├── .gitignore ├── src ├── test │ ├── resources │ │ ├── cleanTestTable.sql │ │ ├── issues │ │ │ └── 8 │ │ │ │ ├── dropIdTable.sql │ │ │ │ └── idTable.sql │ │ ├── initSchemaStatement.sql │ │ ├── logback.xml │ │ ├── timezoneTest.sql │ │ └── initTestTable.sql │ └── kotlin │ │ └── com │ │ └── github │ │ └── thake │ │ └── logminer │ │ └── kafka │ │ └── connect │ │ ├── TimestampWithTimezoneTest.kt │ │ ├── TimestampWithLocalTimezoneTest.kt │ │ ├── SourceConnectorConfigTest.kt │ │ ├── AbstractCdcSourceIntegrationTest.kt │ │ ├── BigDecimalTypeTest.kt │ │ ├── ConnectionExtensions.kt │ │ ├── logminer │ │ └── TransactionTest.kt │ │ ├── DMLOperationsTest.kt │ │ ├── issues │ │ ├── Issue8Test.kt │ │ └── Issue14Test.kt │ │ ├── LongStatementsTest.kt │ │ ├── ChangingTableTest.kt │ │ ├── SelectSourceTest.kt │ │ ├── OffsetIntegrationTest.kt │ │ ├── TimezoneTest.kt │ │ ├── AbstractIntegrationTest.kt │ │ ├── SchemaServiceTest.kt │ │ └── SourceTaskTest.kt └── main │ └── kotlin │ └── com │ └── github │ └── thake │ └── logminer │ └── kafka │ └── connect │ ├── PollResult.kt │ ├── Operation.kt │ ├── Source.kt │ ├── ConnectNameService.kt │ ├── CdcRecord.kt │ ├── TableId.kt │ ├── SourceDatabaseNameService.kt │ ├── LogminerSourceConnector.kt │ ├── Offset.kt │ ├── initial │ ├── TableFetcher.kt │ └── SelectSource.kt │ ├── logminer │ ├── TransactionConsolidator.kt │ ├── LogminerRow.kt │ ├── LogminerSource.kt │ ├── LogminerFetcher.kt │ ├── Transaction.kt │ └── LogminerSession.kt │ ├── SchemaService.kt │ ├── ConnectSchemaFactory.kt │ ├── SourceTask.kt │ ├── SourceConnectorConfig.kt │ └── ColumnSchemaBuilder.kt ├── settings.gradle.kts ├── renovate.json ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── logminer-kafka-connect.properties ├── .github └── workflows │ ├── gradle.yml │ └── detekt.yml ├── manifest.json ├── gradlew.bat ├── gradlew ├── Readme.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .gradle/ 2 | .idea/ 3 | build/ 4 | gradle.properties -------------------------------------------------------------------------------- /src/test/resources/cleanTestTable.sql: -------------------------------------------------------------------------------- 1 | DROP USER SIT CASCADE; -------------------------------------------------------------------------------- /settings.gradle.kts: -------------------------------------------------------------------------------- 1 | rootProject.name = "logminer-kafka-connect" 2 | 3 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "config:base" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thake/logminer-kafka-connect/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /src/test/resources/issues/8/dropIdTable.sql: -------------------------------------------------------------------------------- 1 | ALTER TABLE SIT.ID_TEST_TABLE DROP PRIMARY KEY; 2 | DROP INDEX SIT.ID_TEST_TABLE_PK; 3 | DROP TABLE SIT.ID_TEST_TABLE; -------------------------------------------------------------------------------- /src/test/resources/initSchemaStatement.sql: -------------------------------------------------------------------------------- 1 | CREATE USER SIT IDENTIFIED BY "SIT" DEFAULT TABLESPACE USERS; 2 | alter user system quota unlimited on users; 3 | alter user sit quota unlimited on users; -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/PollResult.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | data class PollResult( 4 | val cdcRecord: CdcRecord, 5 | val offset: Offset 6 | ) -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/Operation.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | enum class Operation(val stringRep: String) { 4 | READ("r"), 5 | UPDATE("u"), 6 | INSERT("i"), 7 | DELETE("d") 8 | } -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.9.4-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/Source.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import java.sql.Connection 4 | 5 | interface Source : AutoCloseable { 6 | fun getOffset(): T 7 | fun maybeStartQuery(db: Connection) 8 | fun poll(): List 9 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/ConnectNameService.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | interface ConnectNameService { 4 | fun getTopicName(table: TableId): String 5 | fun getValueRecordName(table: TableId): String 6 | fun getKeyRecordName(table: TableId): String 7 | fun getBeforeAfterStructName(table: TableId): String 8 | } -------------------------------------------------------------------------------- /logminer-kafka-connect.properties: -------------------------------------------------------------------------------- 1 | name=logminer-kafka-connect 2 | connector.class=com.github.thake.logminer.kafka.connect.LogminerSourceConnector 3 | start.scn=0 4 | db.name=chosen-alias-name, 5 | db.sid=XE, 6 | db.hostname=127.0.0.1 7 | db.port=1521 8 | db.user=dbUser 9 | db.user.password=dbPassword 10 | db.fetch.size=10000 11 | batch.size=1000 12 | poll.interval.ms=500 13 | table.whitelist=MYSCHEMA.TABLE_NAME, OTHERSCHEMA -------------------------------------------------------------------------------- /src/test/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/CdcRecord.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import java.sql.Timestamp 4 | 5 | data class CdcRecord( 6 | val scn: Long, 7 | val rowId: String, 8 | val table: TableId, 9 | val timestamp: Timestamp, 10 | val operation: Operation, 11 | val transaction: String, 12 | val username: String?, 13 | val dataSchema: SchemaDefinition, 14 | val before: Map?, 15 | val after: Map? 16 | ) -------------------------------------------------------------------------------- /src/test/resources/timezoneTest.sql: -------------------------------------------------------------------------------- 1 | CREATE USER SIT IDENTIFIED BY "SIT" DEFAULT TABLESPACE USERS; 2 | alter user system quota unlimited on users; 3 | alter user sit quota unlimited on users; 4 | ALTER DATABASE SET TIME_ZONE = 'Europe/Berlin'; 5 | create table SIT.TIME_TEST 6 | ( 7 | id NUMBER(8) 8 | constraint TIME_TEST_pk 9 | primary key, 10 | time TIMESTAMP not null, 11 | time_with_time_zone TIMESTAMP WITH TIME ZONE not null, 12 | time_with_local_time_zone TIMESTAMP WITH LOCAL TIME ZONE not null 13 | ); -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/TimestampWithTimezoneTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.SchemaType.TimeType.TimestampType 4 | import io.kotest.matchers.nulls.shouldNotBeNull 5 | import io.kotest.matchers.should 6 | import io.kotest.matchers.shouldBe 7 | import org.junit.jupiter.api.Test 8 | 9 | class TimestampWithTimezoneTest { 10 | @Test 11 | fun testCorrectlyParsed(){ 12 | val timestamp = "2020-09-24 03:06:31.489000 US/Pacific PDT" 13 | TimestampType.TimestampWithTimezone(6).convert(timestamp).should{ 14 | it.shouldNotBeNull() 15 | } 16 | } 17 | } -------------------------------------------------------------------------------- /.github/workflows/gradle.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a Java project with Gradle 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-gradle 3 | 4 | name: CI-Build 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Set up JDK 1.8 20 | uses: actions/setup-java@v3 21 | with: 22 | java-version: 1.8 23 | - name: Grant execute permission for gradlew 24 | run: chmod +x gradlew 25 | - name: Build with Gradle 26 | run: ./gradlew test 27 | -------------------------------------------------------------------------------- /src/test/resources/initTestTable.sql: -------------------------------------------------------------------------------- 1 | CREATE USER SIT IDENTIFIED BY "SIT" DEFAULT TABLESPACE USERS; 2 | alter user system quota unlimited on users; 3 | alter user sit quota unlimited on users; 4 | create table SIT.TEST_TAB 5 | ( 6 | id NUMBER(8) 7 | constraint TEST_TAB_pk 8 | primary key, 9 | time TIMESTAMP not null, 10 | string VARCHAR2(255), 11 | "integer" NUMBER(7) not null, 12 | "long" NUMBER(14), 13 | "date" date not null, 14 | big_decimal NUMBER default 0 15 | ); 16 | create table SIT.SECOND_TAB 17 | ( 18 | id NUMBER(8), 19 | time TIMESTAMP not null, 20 | string VARCHAR2(255), 21 | "integer" NUMBER(6) not null, 22 | "long" NUMBER(14), 23 | "date" date not null, 24 | big_decimal NUMBER default 0 25 | ); 26 | -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/TimestampWithLocalTimezoneTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.SchemaType.TimeType.TimestampType 4 | import io.kotest.matchers.nulls.shouldNotBeNull 5 | import io.kotest.matchers.should 6 | import io.kotest.matchers.shouldBe 7 | import org.junit.jupiter.api.Test 8 | import java.time.Duration 9 | import java.time.ZoneId 10 | import java.time.ZonedDateTime 11 | import java.util.* 12 | 13 | class TimestampWithLocalTimezoneTest { 14 | @Test 15 | fun testCorrectlyParsed(){ 16 | val timestamp = "2020-09-24 10:11:26.684000+00:00" 17 | TimestampType.TimestampWithLocalTimezone(6).convert(timestamp).should{ 18 | it.shouldNotBeNull() 19 | it.shouldBe(Date.from(ZonedDateTime.of(2020,9,24,10,11,26, Duration.ofMillis(684).nano 20 | ,ZoneId.of("UTC")).toInstant())) 21 | } 22 | } 23 | } -------------------------------------------------------------------------------- /src/test/resources/issues/8/idTable.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE SIT.ID_TEST_TABLE 2 | ( 3 | ID NUMBER NOT NULL, 4 | NAME VARCHAR2(50 BYTE), 5 | VALUE NUMBER 6 | ) 7 | PCTUSED 0 8 | PCTFREE 10 9 | INITRANS 1 10 | MAXTRANS 255 11 | STORAGE 12 | ( 13 | INITIAL 64K 14 | NEXT 1M 15 | MINEXTENTS 1 16 | MAXEXTENTS UNLIMITED 17 | PCTINCREASE 0 18 | BUFFER_POOL DEFAULT 19 | ) 20 | LOGGING 21 | NOCOMPRESS 22 | NOCACHE 23 | MONITORING; 24 | 25 | CREATE UNIQUE INDEX SIT.ID_TEST_TABLE_PK ON SIT.ID_TEST_TABLE 26 | (ID) 27 | LOGGING 28 | PCTFREE 10 29 | INITRANS 2 30 | MAXTRANS 255 31 | STORAGE ( 32 | INITIAL 64 K 33 | NEXT 1 M 34 | MINEXTENTS 1 35 | MAXEXTENTS UNLIMITED 36 | PCTINCREASE 0 37 | BUFFER_POOL DEFAULT 38 | ); 39 | 40 | ALTER TABLE SIT.ID_TEST_TABLE 41 | ADD ( 42 | CONSTRAINT ID_TEST_TABLE_PK 43 | PRIMARY KEY 44 | (ID) 45 | USING INDEX SIT.ID_TEST_TABLE_PK 46 | ENABLE VALIDATE); -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/TableId.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import java.net.ConnectException 4 | 5 | data class TableId(val owner: String, val table: String) { 6 | val fullName: String = "$owner.$table" 7 | val recordName: String = getNormalizedTableName(table) 8 | 9 | init { 10 | if (!table.matches("^[\\w.-_]+$".toRegex())) { 11 | throw ConnectException("Invalid table name $table for kafka topic.Check table name which must consist only a-z, A-Z, '0-9', ., - and _") 12 | } 13 | } 14 | 15 | private fun getNormalizedTableName(tableName: String): String { 16 | var structName = tableName.substring(0, 1).toUpperCase() + tableName.substring(1).toLowerCase() 17 | if (structName.endsWith("_t")) { 18 | structName = structName.substring(0, structName.length - 2) 19 | } 20 | return structName 21 | } 22 | 23 | companion object { 24 | fun ofFullName(fullname: String): TableId { 25 | val (owner, table) = fullname.split(".") 26 | return TableId(owner, table) 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "component_types": [ 3 | "source" 4 | ], 5 | "name": "${name}", 6 | "title": "Logminer Kafka Connect", 7 | "version": "${version}", 8 | "description": "Logminer Kafka Connect is a CDC Kafka Connect source for Oracle Databases (tested with Oracle 11.2.0.4) using Oracle Logminer.", 9 | "documentation_url": "https://github.com/thake/logminer-kafka-connect", 10 | "features": { 11 | "confluent_control_center_integration": false, 12 | "delivery_guarantee": [ 13 | "at_least_once" 14 | ], 15 | "kafka_connect_api": true, 16 | "single_message_transforms": true, 17 | "supported_encodings": [ 18 | "any" 19 | ] 20 | }, 21 | "license": [ 22 | { 23 | "name": "Apache License, Version 2.0", 24 | "url": "http://www.apache.org/licenses/LICENSE-2.0" 25 | } 26 | ], 27 | "owner": { 28 | "name": "Thorsten Hake", 29 | "type": "user", 30 | "url": "https://thorsten-hake.com/", 31 | "username": "thake" 32 | }, 33 | "requirements": [ 34 | "Oracle Database SE" 35 | ], 36 | "tags": [ 37 | "oracle", 38 | "cdc", 39 | "logminer", 40 | "database" 41 | ] 42 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/SourceDatabaseNameService.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | class SourceDatabaseNameService(private val logicalDatabaseName: String) : ConnectNameService { 4 | private val cachedNames = mutableMapOf() 5 | private fun String.sanitizeName(): String { 6 | fun isValidChar(c: Char) = c == '-' || c == '.' || c == '_' || c in 'A'..'Z' || c in 'a'..'z' || c in '0'..'9' 7 | val builder = StringBuilder() 8 | this.forEach { 9 | builder.append( 10 | if (!isValidChar(it)) { 11 | '_' 12 | } else { 13 | it 14 | } 15 | ) 16 | } 17 | return builder.toString() 18 | } 19 | 20 | override fun getTopicName(table: TableId) = 21 | cachedNames.getOrPut(table, { "$logicalDatabaseName.${table.fullName}".sanitizeName() }) 22 | 23 | override fun getValueRecordName(table: TableId) = getTopicName(table) + ".Envelope" 24 | 25 | override fun getKeyRecordName(table: TableId) = getTopicName(table) + ".Key" 26 | 27 | override fun getBeforeAfterStructName(table: TableId) = getTopicName(table) + ".Value" 28 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/SourceConnectorConfigTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import io.kotest.matchers.shouldBe 4 | import org.junit.jupiter.api.Test 5 | import org.testcontainers.junit.jupiter.Testcontainers 6 | import java.time.ZoneId 7 | 8 | @Testcontainers 9 | class SourceConnectorConfigTest : AbstractIntegrationTest() { 10 | override fun getInitScript() = "timezoneTest.sql" 11 | private val underTest by lazy { 12 | SourceConnectorConfig(with(SourceConnectorConfig.Companion) { 13 | mapOf( 14 | BATCH_SIZE to "1000", 15 | DB_NAME to "test", 16 | DB_FETCH_SIZE to "10000", 17 | DB_SID to oracle.sid, 18 | DB_HOST to oracle.containerIpAddress, 19 | DB_PORT to oracle.oraclePort.toString(), 20 | DB_USERNAME to oracle.username, 21 | DB_PASSWORD to oracle.password, 22 | START_SCN to "0", 23 | MONITORED_TABLES to STANDARD_TABLE.fullName + ", " + SECOND_TABLE.fullName 24 | ) 25 | }) 26 | } 27 | @Test 28 | fun testTimezoneSetCorrectly(){ 29 | underTest.dbZoneId.shouldBe(ZoneId.of("UTC")) 30 | } 31 | 32 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/AbstractCdcSourceIntegrationTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.logminer.LogminerConfiguration 4 | import com.github.thake.logminer.kafka.connect.logminer.LogminerSource 5 | import org.junit.jupiter.api.AfterEach 6 | import org.junit.jupiter.api.BeforeEach 7 | import org.testcontainers.junit.jupiter.Testcontainers 8 | import java.time.ZoneId 9 | 10 | @Testcontainers 11 | abstract class AbstractCdcSourceIntegrationTest : AbstractIntegrationTest() { 12 | private lateinit var cdcSource: LogminerSource 13 | 14 | protected open val tableSelector: TableSelector 15 | get() = TableSelector(OWNER, TABLE_NAME) 16 | 17 | 18 | fun getCdcSource(dictionarySource : LogminerDictionarySource) : LogminerSource { 19 | cdcSource = createCdcSource(dictionarySource) 20 | return cdcSource 21 | } 22 | 23 | @AfterEach 24 | fun tearDownCdcSource() { 25 | cdcSource.stopLogminer() 26 | } 27 | 28 | protected fun createCdcSource(logminerDictionarySource: LogminerDictionarySource, offset: OracleLogOffset = OracleLogOffset.create(0, 0, true)) = 29 | LogminerSource( 30 | config = LogminerConfiguration( 31 | listOf( 32 | tableSelector 33 | ), 34 | logminerDictionarySource = logminerDictionarySource 35 | ), 36 | offset = offset, 37 | schemaService = SchemaService(SourceDatabaseNameService("A"),defaultZone) 38 | ) 39 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/LogminerSourceConnector.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import org.apache.kafka.common.config.ConfigDef 4 | import org.apache.kafka.connect.connector.Task 5 | import org.apache.kafka.connect.errors.ConnectException 6 | import org.apache.kafka.connect.source.SourceConnector 7 | import java.util.* 8 | 9 | class LogminerSourceConnector : SourceConnector() { 10 | private lateinit var config: SourceConnectorConfig 11 | 12 | companion object { 13 | const val version = "1.0" 14 | const val name = "logminer-kafka-connect" 15 | 16 | } 17 | 18 | override fun version(): String { 19 | return LogminerSourceConnector.version 20 | } 21 | 22 | override fun start(map: Map) { 23 | config = SourceConnectorConfig(map) 24 | val dbName: String = config.dbName 25 | if (dbName == "") { 26 | throw ConnectException("Missing DB logical name property") 27 | } 28 | if (config.monitoredTables.isEmpty()) { 29 | throw ConnectException("No table or schema to be monitored specified") 30 | } 31 | } 32 | 33 | override fun taskClass(): Class { 34 | return SourceTask::class.java 35 | } 36 | 37 | override fun taskConfigs(i: Int): List> { 38 | val configs = 39 | ArrayList>(1) 40 | configs.add(config.originalsStrings()) 41 | return configs 42 | } 43 | 44 | override fun stop() { 45 | //Nothing to do 46 | } 47 | 48 | override fun config(): ConfigDef { 49 | return SourceConnectorConfig.conf() 50 | } 51 | 52 | 53 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/Offset.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | const val TYPE_KEY = "type" 4 | const val CDC_TYPE = "cdc" 5 | const val SELECT_TYPE = "select" 6 | 7 | sealed class Offset { 8 | abstract val map: Map 9 | 10 | companion object { 11 | fun create(map: Map): Offset? = when (map[TYPE_KEY]) { 12 | CDC_TYPE -> OracleLogOffset(map) 13 | SELECT_TYPE -> SelectOffset(map) 14 | else -> null 15 | } 16 | } 17 | } 18 | 19 | class OracleLogOffset( 20 | override val map: Map 21 | ) : Offset() { 22 | val scn: Long by map 23 | val commitScn: Long by map 24 | val isTransactionComplete: Boolean by map 25 | 26 | companion object { 27 | fun create(scn: Long, commitScn: Long, isTransactionComplete: Boolean) = 28 | OracleLogOffset( 29 | mapOf( 30 | TYPE_KEY to CDC_TYPE, 31 | "scn" to scn, 32 | "commitScn" to commitScn, 33 | "isTransactionComplete" to isTransactionComplete 34 | ) 35 | ) 36 | } 37 | override fun toString() = "OracaleLogOffset(scn: $scn, commitScn: $commitScn, isTransactionComplete: $isTransactionComplete)" 38 | } 39 | 40 | class SelectOffset(override val map: Map) : Offset() { 41 | val table: TableId 42 | get() = TableId(map["owner"] as String, map["table"] as String) 43 | val scn: Long by map 44 | val rowId: String by map 45 | fun toOracleLogOffset() = OracleLogOffset.create(scn, scn, false) 46 | 47 | companion object { 48 | fun create(scn: Long, table: TableId, rowId: String) = 49 | SelectOffset( 50 | mapOf( 51 | TYPE_KEY to SELECT_TYPE, 52 | "scn" to scn, 53 | "table" to table.table, 54 | "owner" to table.owner, 55 | "rowId" to rowId 56 | ) 57 | ) 58 | } 59 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/BigDecimalTypeTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import io.confluent.connect.avro.AvroData 4 | import io.kotest.matchers.comparables.shouldBeEqualComparingTo 5 | import io.kotest.matchers.nulls.shouldNotBeNull 6 | import io.kotest.matchers.should 7 | import io.kotest.matchers.shouldBe 8 | import io.mockk.every 9 | import io.mockk.mockk 10 | import org.apache.avro.LogicalType 11 | import org.apache.avro.LogicalTypes 12 | import org.junit.jupiter.api.Test 13 | import java.math.BigDecimal 14 | import java.sql.ResultSet 15 | import javax.xml.validation.Schema 16 | 17 | class BigDecimalTypeTest { 18 | @Test 19 | fun testCorrectScaleForString() { 20 | val type = SchemaType.NumberType.BigDecimalType(13,10) 21 | val str = "123.20" 22 | type.convert(str).should { 23 | it.scale().shouldBe(type.scale) 24 | it.shouldBeEqualComparingTo(str.toBigDecimal()) 25 | } 26 | } 27 | @Test 28 | fun testCorrectScaleForResultSet(){ 29 | val resultSet = mockk() 30 | val columnIndex = 1 31 | val expectedDecimal = "234.123".toBigDecimal() 32 | every { resultSet.getBigDecimal(columnIndex) }.returns(expectedDecimal) 33 | val type = SchemaType.NumberType.BigDecimalType(ORACLE_UNQUALIFIED_NUMBER_PRECISION, 34 | ORACLE_UNQUALIFIED_NUMBER_SCALE) 35 | type.extract(columnIndex,resultSet).should { 36 | it.shouldNotBeNull() 37 | it.scale().shouldBe(type.scale) 38 | it.shouldBeEqualComparingTo(expectedDecimal) 39 | } 40 | 41 | } 42 | @Test 43 | fun testConversionToAvroSchema(){ 44 | val type = SchemaType.NumberType.BigDecimalType(ORACLE_UNQUALIFIED_NUMBER_PRECISION, 45 | ORACLE_UNQUALIFIED_NUMBER_SCALE) 46 | val schema = type.createSchemaBuilder().build() 47 | val avroData = AvroData(10) 48 | val avroSchema = avroData.fromConnectSchema(schema) 49 | avroSchema.type.shouldBe(org.apache.avro.Schema.Type.BYTES) 50 | avroSchema.logicalType.shouldBe(LogicalTypes.decimal(ORACLE_UNQUALIFIED_NUMBER_PRECISION, 51 | ORACLE_UNQUALIFIED_NUMBER_SCALE)) 52 | } 53 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/ConnectionExtensions.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.logminer.LogminerSchema 4 | import java.math.BigDecimal 5 | import java.sql.Connection 6 | import java.sql.Date 7 | import java.sql.Timestamp 8 | import java.time.Instant 9 | import java.time.LocalDate 10 | 11 | 12 | fun Connection.executeUpdate(sql: String): Int { 13 | return this.prepareStatement(sql).use { it.executeUpdate() } 14 | } 15 | val Connection.currentScn 16 | get() = this.prepareStatement("Select CURRENT_SCN from v${"$"}database").use { statement -> 17 | statement.executeQuery().use { 18 | it.next() 19 | it.getLong(1) 20 | } 21 | } 22 | fun Connection.insertRow(id: Int, table: TableId) { 23 | val columnList = Columns.values().joinToString(",", "(", ")") { "\"${it.name}\"" } 24 | LOG.info { "SCN before inserting row with id $id in $table: ${this.currentScn}" } 25 | this.prepareStatement("INSERT INTO ${table.fullName} $columnList VALUES (?,?,?,?,?,?,?)").use { 26 | it.setInt(1, id) 27 | it.setTimestamp(2, Timestamp.from(Instant.now())) 28 | it.setString(3, "Test") 29 | it.setInt(4, 123456) 30 | it.setLong(5, 183456L) 31 | it.setDate(6, Date.valueOf(LocalDate.now())) 32 | it.setBigDecimal(7, BigDecimal("30.516658782958984")) 33 | val result = it.executeUpdate() 34 | if(result == 0){ 35 | throw IllegalStateException("Could not insert row with id $id") 36 | }else{ 37 | LOG.info { "Inserted new row in $table with id $id" } 38 | } 39 | } 40 | LOG.info {"SCN for inserted row with id $id in $table: ${this.getScnOfRow(id,table)}"} 41 | LOG.info { "SCN after inserting row with id $id in $table: ${this.currentScn}" } 42 | } 43 | fun Connection.getScnOfRow(id : Int, table: TableId) : Long{ 44 | return this.prepareStatement("SELECT ORA_ROWSCN FROM ${table.fullName} WHERE id = ?").use { stmt -> 45 | stmt.setInt(1,id) 46 | stmt.executeQuery().use { resultSet -> 47 | resultSet.next() 48 | resultSet.getLong(1) 49 | } 50 | } 51 | } 52 | 53 | fun Connection.insertRow(id: Int) { 54 | insertRow(id, STANDARD_TABLE) 55 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/logminer/TransactionTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.logminer 2 | 3 | import com.github.thake.logminer.kafka.connect.Operation 4 | import com.github.thake.logminer.kafka.connect.SchemaDefinition 5 | import com.github.thake.logminer.kafka.connect.SchemaService 6 | import com.github.thake.logminer.kafka.connect.TableId 7 | import io.mockk.every 8 | import io.mockk.mockk 9 | import io.mockk.verify 10 | import net.openhft.chronicle.queue.ChronicleQueue 11 | import org.junit.jupiter.api.Test 12 | import java.sql.Connection 13 | import java.sql.Timestamp 14 | import java.util.concurrent.CountDownLatch 15 | import kotlin.concurrent.thread 16 | 17 | class TransactionTest { 18 | val schemaService: SchemaService = mockk() 19 | val queueFactory: ((xid: String) -> ChronicleQueue) = mockk() 20 | val conn: Connection = mockk() 21 | val initialRecord: LogminerRow.Change 22 | val underTest: Transaction 23 | 24 | init { 25 | initialRecord = LogminerRow.Change( 26 | rowIdentifier = LogminerRowIdentifier(1, "1"), 27 | transaction = "a", 28 | operation = Operation.INSERT, 29 | sqlRedo = "Blub", 30 | table = TableId.ofFullName("blub.blu"), 31 | timestamp = Timestamp(0), 32 | username = "ab" 33 | ) 34 | every { 35 | schemaService.getSchema(conn, initialRecord.table) 36 | }.returns(mockk()) 37 | 38 | underTest = Transaction(queueFactory, conn, initialRecord, schemaService) 39 | } 40 | 41 | @Test 42 | fun testParallelUpdateSchema() { 43 | val tableId = mockk() 44 | val schemaDefinition = mockk() 45 | every { 46 | schemaService.refreshSchema(conn, tableId) 47 | }.returns(schemaDefinition) 48 | 49 | val numberOfThreads = 5 50 | val countDownLatch = CountDownLatch(numberOfThreads) 51 | repeat(numberOfThreads) { 52 | thread { 53 | underTest.updateSchemaIfOutdated(tableId) 54 | countDownLatch.countDown() 55 | } 56 | } 57 | countDownLatch.await() 58 | verify(exactly = 1) { 59 | schemaService.refreshSchema(conn, tableId) 60 | } 61 | 62 | } 63 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/DMLOperationsTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | 4 | import org.junit.jupiter.api.Assertions.assertEquals 5 | import org.junit.jupiter.params.ParameterizedTest 6 | import org.junit.jupiter.params.provider.EnumSource 7 | import org.testcontainers.junit.jupiter.Testcontainers 8 | import java.time.LocalDate 9 | import java.time.LocalDateTime 10 | import java.time.ZoneId 11 | import java.time.ZoneOffset 12 | import java.util.* 13 | 14 | @Testcontainers 15 | class DMLOperationsTest : AbstractCdcSourceIntegrationTest() { 16 | 17 | @ParameterizedTest 18 | @EnumSource 19 | fun testInsertRecord(dictionarySource: LogminerDictionarySource) { 20 | val conn = openConnection() 21 | val insertedId = 1 22 | conn.insertRow(1) 23 | val cdcSource = getCdcSource(dictionarySource) 24 | val results = cdcSource.getResults(conn) 25 | assertContainsOnlySpecificOperationForIds(results, insertedId.rangeTo(insertedId), Operation.INSERT) 26 | assertAllAfterColumnsContained(results) 27 | } 28 | 29 | @ParameterizedTest 30 | @EnumSource 31 | fun testDeleteRecord(dictionarySource: LogminerDictionarySource) { 32 | val conn = openConnection() 33 | (0 until 100).forEach { conn.insertRow(it) } 34 | //Clear results by explicitly polling them 35 | val cdcSource = getCdcSource(dictionarySource) 36 | cdcSource.getResults(conn) 37 | conn.executeUpdate("DELETE FROM ${STANDARD_TABLE.fullName} WHERE id < 50") 38 | val results = cdcSource.getResults(conn) 39 | assertContainsOnlySpecificOperationForIds(results, 0 until 50, Operation.DELETE) 40 | assertAllBeforeColumnsContained(results) 41 | } 42 | 43 | @ParameterizedTest 44 | @EnumSource 45 | fun testUpdateRecords(dictionarySource: LogminerDictionarySource) { 46 | val conn = openConnection() 47 | (0 until 100).forEach { conn.insertRow(it) } 48 | val cdcSource = getCdcSource(dictionarySource) 49 | cdcSource.getResults(conn) 50 | conn 51 | .executeUpdate("UPDATE ${STANDARD_TABLE.fullName} SET string = 'AAAA', time = TIMESTAMP '2020-01-13 15:45:01', \"date\" = DATE '2020-01-13' where id < 50") 52 | val results = cdcSource.getResults(conn) 53 | assertContainsOnlySpecificOperationForIds(results, 0 until 50, Operation.UPDATE) 54 | assertAllBeforeColumnsContained(results) 55 | results.forEach { 56 | val after = it.cdcRecord.after!! 57 | assertEquals(3, after.size) 58 | assertEquals("AAAA", after[Columns.STRING.name]) 59 | assertEquals( 60 | Date.from(LocalDateTime.of(2020, 1, 13, 15, 45, 1).atZone(ZoneId.systemDefault()).toInstant()), 61 | after[Columns.TIME.name] 62 | ) 63 | assertEquals( 64 | Date.from(LocalDate.of(2020, 1, 13).atStartOfDay(ZoneOffset.UTC).toInstant()), 65 | after[Columns.date.name] 66 | ) 67 | } 68 | } 69 | 70 | } -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if "%ERRORLEVEL%"=="0" goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 84 | exit /b 1 85 | 86 | :mainEnd 87 | if "%OS%"=="Windows_NT" endlocal 88 | 89 | :omega 90 | -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/issues/Issue8Test.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.issues 2 | 3 | import com.github.thake.logminer.kafka.connect.AbstractIntegrationTest 4 | import com.github.thake.logminer.kafka.connect.OWNER 5 | import com.github.thake.logminer.kafka.connect.SchemaService 6 | import com.github.thake.logminer.kafka.connect.SourceDatabaseNameService 7 | import com.github.thake.logminer.kafka.connect.TableId 8 | import com.github.thake.logminer.kafka.connect.initial.SelectSource 9 | import io.kotest.matchers.collections.shouldHaveSize 10 | import io.kotest.matchers.comparables.shouldBeEqualComparingTo 11 | import io.kotest.matchers.maps.shouldHaveKeys 12 | import io.kotest.matchers.nulls.shouldNotBeNull 13 | import io.kotest.matchers.should 14 | import io.kotest.matchers.shouldBe 15 | import io.kotest.matchers.types.shouldBeInstanceOf 16 | import org.junit.jupiter.api.AfterAll 17 | import org.junit.jupiter.api.AfterEach 18 | import org.junit.jupiter.api.BeforeAll 19 | import org.junit.jupiter.api.BeforeEach 20 | import org.junit.jupiter.api.Test 21 | import org.testcontainers.junit.jupiter.Testcontainers 22 | import java.math.BigDecimal 23 | 24 | 25 | @Testcontainers 26 | class Issue8Test: AbstractIntegrationTest() { 27 | private lateinit var selectSource: SelectSource 28 | companion object{ 29 | val TEST_TABLE = TableId(OWNER, "ID_TEST_TABLE") 30 | } 31 | 32 | @BeforeEach 33 | fun setupSource() { 34 | runScripts("issues/8/idTable.sql") 35 | //Wait for table creation 36 | while (!openConnection().metaData.getTables(null, TEST_TABLE.owner, TEST_TABLE.table, null).use { 37 | it.next() 38 | }) { 39 | Thread.sleep(1000) 40 | } 41 | Thread.sleep(5000) 42 | selectSource = SelectSource(1000, listOf(TEST_TABLE), SchemaService(SourceDatabaseNameService("A"),defaultZone), null) 43 | 44 | } 45 | 46 | @AfterEach 47 | fun destroySource() { 48 | selectSource.close() 49 | runScripts("issues/8/dropIdTable.sql") 50 | } 51 | @Test 52 | fun testFullRead(){ 53 | //Insert one entry 54 | val conn = openConnection() 55 | val id = 1.toBigDecimal() 56 | val name = "myTestValue" 57 | val value = 2.toBigDecimal() 58 | 59 | conn.prepareStatement("INSERT INTO ${TEST_TABLE.fullName} VALUES (?,?,?)").use { stmt -> 60 | stmt.setBigDecimal(1,id) 61 | stmt.setString(2,name) 62 | stmt.setBigDecimal(3,value) 63 | stmt.executeUpdate() 64 | } 65 | selectSource.maybeStartQuery(conn) 66 | val result = selectSource.poll() 67 | result.shouldHaveSize(1) 68 | 69 | result[0].cdcRecord.after.should{ 70 | it.shouldNotBeNull() 71 | it.shouldHaveKeys("ID", "NAME", "VALUE") 72 | it["ID"].should{idValue -> 73 | idValue.shouldBeInstanceOf() 74 | idValue.shouldBeEqualComparingTo(id) 75 | } 76 | it["NAME"].shouldBe(name) 77 | it["VALUE"].should{ valueValue -> 78 | valueValue.shouldBeInstanceOf() 79 | valueValue.shouldBeEqualComparingTo(value) 80 | } 81 | } 82 | } 83 | 84 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/LongStatementsTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import mu.KotlinLogging 4 | import org.junit.jupiter.api.Assertions.assertEquals 5 | import org.junit.jupiter.api.Assertions.assertNotNull 6 | import org.junit.jupiter.params.ParameterizedTest 7 | import org.junit.jupiter.params.provider.EnumSource 8 | import org.testcontainers.containers.OracleContainer 9 | import org.testcontainers.junit.jupiter.Container 10 | import java.util.concurrent.CountDownLatch 11 | 12 | val LOG = KotlinLogging.logger {} 13 | 14 | class LongStatementsTest : AbstractCdcSourceIntegrationTest() { 15 | override val tableSelector: TableSelector 16 | get() = TableSelector(OWNER, "TAB_WITH_LONG_STMTS") 17 | 18 | @Container 19 | override val oracle: OracleContainer = 20 | OracleContainer("thake/oracle-xe-11g-archivelog").withInitScript("initSchemaStatement.sql").withReuse(false) 21 | 22 | @ParameterizedTest 23 | @EnumSource 24 | fun testLongStatementWrapping(dictionarySource: LogminerDictionarySource) { 25 | val columns = 500 26 | val columnSize = 255 27 | val strValue = IntRange(0, columnSize - 1).joinToString(separator = "") { "a" } 28 | val columnRange = IntRange(0, columns) 29 | val createTableStatement = """create table SIT.TAB_WITH_LONG_STMTS 30 | ( 31 | id NUMBER(8) constraint TAB_WITH_LONG_STMTS_pk primary key, 32 | """ + columnRange.joinToString(separator = ",\n") { "\tmy_long_column_name_$it VARCHAR2(${columnSize * 4} CHAR)" } + ")".trim() 33 | val insertStr = 34 | "INSERT INTO SIT.TAB_WITH_LONG_STMTS VALUES (?," + columnRange.joinToString(separator = ",") { "?" } + ")" 35 | openConnection().use { 36 | it.createStatement().use { stmt -> 37 | LOG.info { "Creating table with SQL:\n $createTableStatement" } 38 | stmt.executeUpdate(createTableStatement) 39 | } 40 | } 41 | val entries = 100 42 | val updateExecutions = 10 43 | val finished = CountDownLatch(1) 44 | val pollConnection = openConnection() 45 | val cdcSource = getCdcSource(dictionarySource) 46 | Thread { 47 | val conn = openConnection() 48 | conn.prepareStatement(insertStr).use { stmt -> 49 | for (i in 0 until entries) { 50 | stmt.setInt(1, i) 51 | columnRange.forEach { 52 | stmt.setString(it + 2, strValue) 53 | } 54 | stmt.addBatch() 55 | } 56 | stmt.executeBatch() 57 | } 58 | //Afterwards execute an update 59 | for (i in 0 until updateExecutions) { 60 | conn.prepareStatement("UPDATE SIT.TAB_WITH_LONG_STMTS SET my_long_column_name_0 = 'addeeff$i'") 61 | .use { it.executeUpdate() } 62 | LOG.info { "Updated entries #$i" } 63 | } 64 | finished.countDown() 65 | }.start() 66 | //Wait for the initialization 67 | Thread.sleep(2000) 68 | var totalReturnedResults = 0 69 | do { 70 | val results = cdcSource.getResults(pollConnection) 71 | assertNotNull(results) 72 | LOG.info { "Retrieved ${results.size} logminer results." } 73 | totalReturnedResults += results.size 74 | } while (results.isNotEmpty() || finished.count == 1L) 75 | assertEquals(totalReturnedResults, (entries * updateExecutions) + entries) 76 | } 77 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/initial/TableFetcher.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.initial 2 | 3 | import com.github.thake.logminer.kafka.connect.* 4 | import mu.KotlinLogging 5 | import java.sql.* 6 | 7 | private val logger = KotlinLogging.logger {} 8 | data class FetcherOffset( 9 | val table: TableId, 10 | val asOfScn: Long, 11 | val rowId: String? 12 | ) 13 | 14 | class TableFetcher(val conn: Connection, val fetcherOffset: FetcherOffset, val schemaService: SchemaService) { 15 | private val stmt: PreparedStatement 16 | private val resultSet: ResultSet 17 | private val schemaDefinition: SchemaDefinition 18 | 19 | init { 20 | fun determineQuery(): String { 21 | val rowIdCondition = fetcherOffset.rowId?.let { "WHERE ROWID > '$it'" } ?: "" 22 | return "SELECT t.*, ROWID, ORA_ROWSCN FROM ${fetcherOffset.table.fullName} AS OF SCN ${fetcherOffset.asOfScn} t $rowIdCondition order by ROWID ASC" 23 | } 24 | schemaDefinition = schemaService.getSchema(conn, fetcherOffset.table) 25 | val query = determineQuery() 26 | stmt = conn.prepareStatement(query) 27 | try { 28 | logger.debug { "Fetching records with sql statement: $query" } 29 | resultSet = stmt.executeQuery() 30 | } catch (e: SQLException) { 31 | stmt.close() 32 | throw e 33 | } 34 | } 35 | 36 | fun poll(): PollResult? { 37 | return try { 38 | if (resultSet.next()) { 39 | val rowId = resultSet.getString("ROWID") 40 | val scn = resultSet.getLong("ORA_ROWSCN") 41 | val values = (1 until resultSet.metaData.columnCount - 1).map { 42 | val name = resultSet.metaData.getColumnName(it) 43 | val columnDef = schemaDefinition.getColumnSchemaType(name) 44 | ?: throw IllegalStateException("Column $name does not exist in schema definition") 45 | var value = try { 46 | columnDef.extract(it, resultSet) 47 | } catch (e: SQLException) { 48 | throw SQLException( 49 | "Couldn't convert value of column $name (table: ${fetcherOffset.table.fullName}). Expected type: $columnDef.", 50 | e 51 | ) 52 | } 53 | if (resultSet.wasNull()) { 54 | value = null 55 | } 56 | Pair(name, value) 57 | }.toMap() 58 | val cdcRecord = CdcRecord( 59 | scn = scn, 60 | username = null, 61 | timestamp = Timestamp(0), 62 | transaction = "NOT AVAILABLE", 63 | table = fetcherOffset.table, 64 | operation = Operation.READ, 65 | before = null, 66 | after = values, 67 | dataSchema = schemaDefinition, 68 | rowId = rowId 69 | ) 70 | val offset = SelectOffset.create(fetcherOffset.asOfScn, fetcherOffset.table, rowId) 71 | return PollResult(cdcRecord, offset) 72 | } else { 73 | logger.debug { "ResultSet for table ${fetcherOffset.table} has no more records." } 74 | null 75 | } 76 | } catch (e: SQLException) { 77 | close() 78 | throw e 79 | } 80 | } 81 | 82 | fun close() { 83 | resultSet.close() 84 | stmt.close() 85 | } 86 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/ChangingTableTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | 4 | import com.github.thake.logminer.kafka.connect.logminer.LogminerSchema 5 | import com.github.thake.logminer.kafka.connect.logminer.LogminerSource 6 | import io.kotest.matchers.nulls.shouldBeNull 7 | import io.kotest.matchers.nulls.shouldNotBeNull 8 | import org.junit.jupiter.api.AfterAll 9 | import org.junit.jupiter.api.AfterEach 10 | import org.junit.jupiter.api.BeforeEach 11 | import org.junit.jupiter.api.TestInstance 12 | import org.junit.jupiter.params.ParameterizedTest 13 | import org.junit.jupiter.params.provider.EnumSource 14 | import org.testcontainers.containers.OracleContainer 15 | import org.testcontainers.ext.ScriptUtils 16 | import org.testcontainers.jdbc.JdbcDatabaseDelegate 17 | import org.testcontainers.junit.jupiter.Container 18 | import java.sql.Connection 19 | import java.time.Duration 20 | 21 | class ChangingTableTest : AbstractCdcSourceIntegrationTest() { 22 | 23 | private fun Connection.addOptionalColumnWithDefault(columnName: String, table: TableId = STANDARD_TABLE) { 24 | this.prepareStatement("alter table ${table.fullName} add $columnName VARCHAR2(255) default 'A'").use { 25 | it.execute() 26 | } 27 | } 28 | private fun Connection.addNullableColumn(columnName: String, table: TableId = STANDARD_TABLE){ 29 | this.prepareStatement("alter table ${table.fullName} add $columnName VARCHAR2(255)").use { 30 | it.execute() 31 | } 32 | } 33 | 34 | private fun performInsertBeforeChange(conn: Connection, cdcSource: LogminerSource){ 35 | val insertedId = 1 36 | conn.insertRow(insertedId) 37 | val results = cdcSource.getResults(conn) 38 | assertContainsOnlySpecificOperationForIds(results, insertedId.rangeTo(insertedId), Operation.INSERT) 39 | assertAllAfterColumnsContained(results) 40 | } 41 | 42 | @ParameterizedTest 43 | @EnumSource 44 | fun testAddColumnWithDefaultValue(dictionarySource: LogminerDictionarySource) { 45 | val conn = openConnection() 46 | val cdcSource = getCdcSource(dictionarySource) 47 | performInsertBeforeChange(conn, cdcSource) 48 | 49 | //Now add the column 50 | val newColumnName = "NEW_COLUMN" 51 | conn.addOptionalColumnWithDefault(newColumnName) 52 | val newInsertedId = 2 53 | conn.insertRow(newInsertedId) 54 | val resultsWithNewColumn = cdcSource.getResults(conn) 55 | assertContainsOnlySpecificOperationForIds( 56 | resultsWithNewColumn, 57 | newInsertedId.rangeTo(newInsertedId), 58 | Operation.INSERT 59 | ) 60 | assertAllAfterColumnsContained(resultsWithNewColumn, Columns.values().map { it.name }.plus(newColumnName)) 61 | resultsWithNewColumn.forEach { 62 | it.cdcRecord.dataSchema.valueSchema.field(newColumnName).shouldNotBeNull() 63 | val after = it.cdcRecord.after 64 | after.shouldNotBeNull() 65 | after[newColumnName].shouldNotBeNull() 66 | } 67 | } 68 | @ParameterizedTest 69 | @EnumSource 70 | fun testAddNullableColumn(dictionarySource: LogminerDictionarySource) { 71 | val conn = openConnection() 72 | val cdcSource = getCdcSource(dictionarySource) 73 | performInsertBeforeChange(conn, cdcSource) 74 | 75 | //Now add the column 76 | val newColumnName = "NEW_COLUMN" 77 | conn.addNullableColumn(newColumnName) 78 | val newInsertedId = 2 79 | conn.insertRow(newInsertedId) 80 | val resultsWithNewColumn = cdcSource.getResults(conn) 81 | assertContainsOnlySpecificOperationForIds( 82 | resultsWithNewColumn, 83 | newInsertedId.rangeTo(newInsertedId), 84 | Operation.INSERT 85 | ) 86 | assertAllAfterColumnsContained(resultsWithNewColumn, Columns.values().map { it.name }.plus(newColumnName)) 87 | resultsWithNewColumn.forEach { 88 | val after = it.cdcRecord.after 89 | after.shouldNotBeNull() 90 | after[newColumnName].shouldBeNull() 91 | it.cdcRecord.dataSchema.valueSchema.field(newColumnName).shouldNotBeNull() 92 | } 93 | } 94 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/initial/SelectSource.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.initial 2 | 3 | import com.github.thake.logminer.kafka.connect.* 4 | import mu.KotlinLogging 5 | import java.sql.Connection 6 | import java.sql.SQLException 7 | 8 | private val logger = KotlinLogging.logger {} 9 | class SelectSource( 10 | private val batchSize: Int, 11 | private val tablesToFetch: List, 12 | private val schemaService: SchemaService, 13 | var lastOffset: SelectOffset? 14 | ) : Source { 15 | 16 | var currentTableFetcher: TableFetcher? = null 17 | var continuePolling = true 18 | 19 | init { 20 | if (tablesToFetch.isEmpty()) { 21 | throw java.lang.IllegalArgumentException("List of tables to fetch is empty, can't do anything") 22 | } 23 | } 24 | 25 | override fun getOffset() = lastOffset 26 | 27 | override fun maybeStartQuery(db: Connection) { 28 | val tableFetcher = currentTableFetcher 29 | if(tableFetcher != null && tableFetcher.conn != db){ 30 | tableFetcher.close() 31 | currentTableFetcher = null 32 | } 33 | if (currentTableFetcher == null) { 34 | val offset = FetcherOffset(determineTableToFetch(), determineAsOfScn(db), lastOffset?.rowId) 35 | logger.debug { "Starting new table fetcher with offset $offset" } 36 | currentTableFetcher = TableFetcher( 37 | db, 38 | offset, 39 | schemaService = schemaService 40 | ) 41 | } 42 | } 43 | 44 | private fun determineTableToFetch(): TableId { 45 | return lastOffset?.table 46 | ?: tablesToFetch.first() 47 | 48 | } 49 | 50 | @Suppress("SqlResolve") 51 | private fun determineAsOfScn(conn: Connection): Long { 52 | return lastOffset?.scn ?: conn.prepareStatement("select CURRENT_SCN from V${'$'}DATABASE").use { stmt -> 53 | stmt.executeQuery().use { 54 | it.next() 55 | it.getLong(1) 56 | } 57 | }.also { 58 | logger.info { "Determined current scn of database as $it" } 59 | } 60 | } 61 | 62 | override fun poll(): List { 63 | try{ 64 | var fetcher = currentTableFetcher ?: throw IllegalStateException("maybeStartQuery hasn't been called") 65 | val result = mutableListOf() 66 | while (result.size < batchSize && continuePolling) { 67 | val nextRecord = fetcher.poll() 68 | if (nextRecord != null) { 69 | lastOffset = nextRecord.offset as SelectOffset 70 | result.add(nextRecord) 71 | } else { 72 | //No new records from the current table. Close the fetcher and check the next table 73 | fetcher.close() 74 | val newIndex = tablesToFetch.indexOf(fetcher.fetcherOffset.table) + 1 75 | if (newIndex < tablesToFetch.size) { 76 | fetcher = TableFetcher( 77 | fetcher.conn, 78 | FetcherOffset(tablesToFetch[newIndex], fetcher.fetcherOffset.asOfScn, null), 79 | schemaService 80 | ) 81 | currentTableFetcher = fetcher 82 | //Exit the loop to return the current result set if it is not empty. 83 | if (result.isNotEmpty()) { 84 | break 85 | } 86 | } else { 87 | //no more records to poll all tables polled 88 | logger.debug { "Stopping fetching from tables as fetch from table ${fetcher.fetcherOffset.table} did not provide any more results." } 89 | continuePolling = false 90 | } 91 | } 92 | } 93 | return result 94 | }catch (e : SQLException){ 95 | currentTableFetcher = null 96 | continuePolling = true 97 | throw e 98 | } 99 | } 100 | 101 | override fun close() { 102 | currentTableFetcher?.close() 103 | } 104 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/SelectSourceTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.initial.SelectSource 4 | import org.junit.jupiter.api.AfterEach 5 | import org.junit.jupiter.api.Assertions.* 6 | import org.junit.jupiter.api.BeforeEach 7 | import org.junit.jupiter.api.Test 8 | import org.testcontainers.junit.jupiter.Testcontainers 9 | 10 | @Testcontainers 11 | class SelectSourceTest : AbstractIntegrationTest() { 12 | private lateinit var selectSource: SelectSource 13 | @BeforeEach 14 | fun setupSource() { 15 | //Wait for table creation 16 | while (!openConnection().metaData.getTables(null, STANDARD_TABLE.owner, STANDARD_TABLE.table, null).use { 17 | it.next() 18 | }) { 19 | Thread.sleep(1000) 20 | } 21 | Thread.sleep(5000) 22 | selectSource = 23 | SelectSource(1000, listOf(STANDARD_TABLE, SECOND_TABLE), SchemaService(SourceDatabaseNameService("A"),defaultZone), null) 24 | 25 | } 26 | 27 | @AfterEach 28 | fun destroySource() { 29 | selectSource.close() 30 | } 31 | 32 | @Test 33 | fun checkSingleTable() { 34 | val conn = openConnection() 35 | (0 until 100).forEach { conn.insertRow(it) } 36 | selectSource.maybeStartQuery(conn) 37 | val result = selectSource.poll() 38 | assertContainsOnlySpecificOperationForIds(result, 0 until 100, Operation.READ) 39 | assertNotNull(selectSource.getOffset()) 40 | selectSource.maybeStartQuery(conn) 41 | val emptyResult = selectSource.poll() 42 | assertNotNull(emptyResult) 43 | assertTrue(emptyResult.isEmpty()) 44 | } 45 | 46 | @Test 47 | fun checkEmptySingleTable() { 48 | val conn = openConnection() 49 | selectSource.maybeStartQuery(conn) 50 | val result = selectSource.poll() 51 | assertNotNull(result) 52 | assertTrue(result.isEmpty()) 53 | assertNull(selectSource.getOffset()) 54 | } 55 | 56 | @Test 57 | fun checkMultipleTables() { 58 | val conn = openConnection() 59 | (0 until 100).forEach { conn.insertRow(it, STANDARD_TABLE) } 60 | (0 until 100).forEach { conn.insertRow(it, SECOND_TABLE) } 61 | selectSource.maybeStartQuery(conn) 62 | val result = selectSource.poll() 63 | assertContainsSpecificOperationForIds(result, 0 until 100, Operation.READ, STANDARD_TABLE) 64 | assertEquals(100, result.size) 65 | val secondResult = selectSource.poll() 66 | assertContainsSpecificOperationForIds(secondResult, 0 until 100, Operation.READ, SECOND_TABLE) 67 | assertEquals(100, secondResult.size) 68 | assertNotNull(selectSource.getOffset()) 69 | selectSource.maybeStartQuery(conn) 70 | val emptyResult = selectSource.poll() 71 | assertNotNull(emptyResult) 72 | assertTrue(emptyResult.isEmpty()) 73 | } 74 | 75 | @Test 76 | fun checkNoDirtyReads() { 77 | selectSource = SelectSource(10, listOf(STANDARD_TABLE), SchemaService(SourceDatabaseNameService("A"),defaultZone), null) 78 | val conn = openConnection() 79 | (0 until 100).forEach { conn.insertRow(it) } 80 | selectSource.maybeStartQuery(conn) 81 | val result = selectSource.poll() 82 | selectSource.close() 83 | val dirtyWriteTransaction = openConnection() 84 | (100 until 200).forEach { dirtyWriteTransaction.insertRow(it) } 85 | selectSource = SelectSource( 86 | 1000, 87 | listOf(STANDARD_TABLE), 88 | SchemaService(SourceDatabaseNameService("A"),defaultZone), 89 | selectSource.lastOffset 90 | ) 91 | selectSource.maybeStartQuery(openConnection()) 92 | val secondResult = selectSource.poll() 93 | val totalResult = result.toMutableList().apply { addAll(secondResult) } 94 | //Committed rows of dirtyWriteTransaction should not be included in the result set. 95 | assertContainsOnlySpecificOperationForIds(totalResult, 0 until 100, Operation.READ) 96 | assertNotNull(selectSource.getOffset()) 97 | selectSource.maybeStartQuery(conn) 98 | val emptyResult = selectSource.poll() 99 | assertNotNull(emptyResult) 100 | assertTrue(emptyResult.isEmpty()) 101 | } 102 | 103 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/OffsetIntegrationTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import org.junit.jupiter.api.Assertions 4 | import org.junit.jupiter.params.ParameterizedTest 5 | import org.junit.jupiter.params.provider.EnumSource 6 | import org.testcontainers.junit.jupiter.Testcontainers 7 | 8 | @Testcontainers 9 | class OffsetIntegrationTest : AbstractCdcSourceIntegrationTest() { 10 | @ParameterizedTest 11 | @EnumSource 12 | fun testConsecutiveTransactions(dictionarySource: LogminerDictionarySource) { 13 | val first = openConnection() 14 | val firstRange = 1..100 15 | firstRange.forEach { first.insertRow(it) } 16 | first.close() 17 | val cdcConn = openConnection() 18 | val cdcSource = getCdcSource(dictionarySource) 19 | val result = cdcSource.getResults(cdcConn) 20 | assertContainsOnlySpecificOperationForIds(result, firstRange, Operation.INSERT) 21 | val second = openConnection() 22 | val secondRange = 101..200 23 | secondRange.forEach { second.insertRow(it) } 24 | val secondResult = cdcSource.getResults(cdcConn) 25 | assertContainsOnlySpecificOperationForIds(secondResult, secondRange, Operation.INSERT) 26 | } 27 | @ParameterizedTest 28 | @EnumSource 29 | fun testConcurrentTransactions(dictionarySource: LogminerDictionarySource) { 30 | val longTransaction = openConnection() 31 | longTransaction.autoCommit = false 32 | val shortTransaction = openConnection() 33 | (1..100).forEach { longTransaction.insertRow(it) } 34 | shortTransaction.insertRow(101) 35 | shortTransaction.insertRow(102) 36 | //Read the first batch before committing the long transaction 37 | val queryConnection = openConnection() 38 | val cdcSource = getCdcSource(dictionarySource) 39 | val firstBatch = cdcSource.getResults(queryConnection) 40 | assertContainsOnlySpecificOperationForIds(firstBatch, 101..102, Operation.INSERT) 41 | //Now commit the long running transaction. 42 | longTransaction.commit() 43 | assertContainsOnlySpecificOperationForIds(cdcSource.getResults(queryConnection), 1..100, Operation.INSERT) 44 | } 45 | @ParameterizedTest 46 | @EnumSource 47 | fun testRestartConcurrentTransactions(dictionarySource: LogminerDictionarySource) { 48 | val longTransaction = openConnection() 49 | longTransaction.autoCommit = false 50 | val shortTransaction = openConnection() 51 | shortTransaction.autoCommit = true 52 | (1..100).forEach { longTransaction.insertRow(it) } 53 | shortTransaction.insertRow(101) 54 | shortTransaction.insertRow(102) 55 | //Read the first batch before committing the long transaction 56 | val queryConnection = openConnection() 57 | val cdcSource = getCdcSource(dictionarySource) 58 | val firstBatch = cdcSource.getResults(queryConnection) 59 | assertContainsOnlySpecificOperationForIds(firstBatch, 101..102, Operation.INSERT) 60 | cdcSource.close() 61 | queryConnection.close() 62 | 63 | //Now start a new CdcSource with a new connection. 64 | val newSource = createCdcSource(dictionarySource,firstBatch.last().offset as OracleLogOffset) 65 | val newQueryConnection = openConnection() 66 | Assertions.assertTrue( 67 | newSource.getResults(newQueryConnection).isEmpty(), 68 | "Old transaction records have been read twice!" 69 | ) 70 | //Now commit the long running transaction. 71 | longTransaction.commit() 72 | assertContainsOnlySpecificOperationForIds(newSource.getResults(newQueryConnection), 1..100, Operation.INSERT) 73 | } 74 | 75 | @ParameterizedTest 76 | @EnumSource 77 | fun testPolledWithinTransaction(dictionarySource: LogminerDictionarySource) { 78 | val longTransaction = openConnection() 79 | longTransaction.autoCommit = false 80 | (1..100).forEach { longTransaction.insertRow(it) } 81 | //Read the first batch before committing the long transaction 82 | val queryConnection = openConnection() 83 | val cdcSource = getCdcSource(dictionarySource) 84 | val firstBatch = cdcSource.getResults(queryConnection) 85 | assertContainsOnlySpecificOperationForIds(firstBatch, IntRange.EMPTY, Operation.INSERT) 86 | //Write the next entries 87 | (101..200).forEach { longTransaction.insertRow(it) } 88 | longTransaction.commit() 89 | assertContainsOnlySpecificOperationForIds(cdcSource.getResults(queryConnection), 1..200, Operation.INSERT) 90 | } 91 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/TimezoneTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.initial.SelectSource 4 | import com.github.thake.logminer.kafka.connect.logminer.LogminerConfiguration 5 | import com.github.thake.logminer.kafka.connect.logminer.LogminerSource 6 | import io.kotest.matchers.nulls.shouldNotBeNull 7 | import io.kotest.matchers.shouldBe 8 | import io.kotest.matchers.types.shouldBeInstanceOf 9 | import org.junit.jupiter.api.AfterEach 10 | import org.junit.jupiter.api.Assertions 11 | import org.junit.jupiter.api.BeforeEach 12 | import org.junit.jupiter.api.Test 13 | import org.testcontainers.containers.OracleContainer 14 | import org.testcontainers.junit.jupiter.Container 15 | import org.testcontainers.junit.jupiter.Testcontainers 16 | import java.math.BigDecimal 17 | import java.sql.Connection 18 | import java.sql.Date 19 | import java.sql.Timestamp 20 | import java.time.* 21 | import java.time.temporal.ChronoField 22 | import java.util.* 23 | 24 | val TIME_TABLE = TableId(OWNER, "TIME_TEST") 25 | @Testcontainers 26 | class TimezoneTest : AbstractIntegrationTest() { 27 | private lateinit var selectSource: SelectSource 28 | private lateinit var cdcSource: LogminerSource 29 | override fun getInitScript() = "timezoneTest.sql" 30 | 31 | fun getCdcSource(dictionarySource : LogminerDictionarySource = LogminerDictionarySource.ONLINE) : LogminerSource { 32 | cdcSource = createCdcSource(dictionarySource) 33 | return cdcSource 34 | } 35 | 36 | @AfterEach 37 | fun tearDownCdcSource() { 38 | cdcSource.stopLogminer() 39 | } 40 | 41 | private fun createCdcSource(logminerDictionarySource: LogminerDictionarySource, offset: OracleLogOffset = OracleLogOffset.create(0, 0, true)) = 42 | LogminerSource( 43 | config = LogminerConfiguration( 44 | listOf( 45 | TableSelector(TIME_TABLE.owner,TIME_TABLE.table) 46 | ), 47 | logminerDictionarySource = logminerDictionarySource 48 | ), 49 | offset = offset, 50 | schemaService = SchemaService(SourceDatabaseNameService("A"),defaultZone) 51 | ) 52 | @BeforeEach 53 | fun setupSource() { 54 | //Wait for table creation 55 | while (!openConnection().metaData.getTables(null, TIME_TABLE.owner, TIME_TABLE.table, null).use { 56 | it.next() 57 | }) { 58 | Thread.sleep(1000) 59 | } 60 | Thread.sleep(5000) 61 | selectSource = 62 | SelectSource(1000, listOf(TIME_TABLE), SchemaService(SourceDatabaseNameService("A"),defaultZone), null) 63 | } 64 | 65 | @Test 66 | fun testCorrectTimestamp(){ 67 | //Set the time zone to a timezone different than the database 68 | val writeTimezone = TimeZone.getTimeZone("US/Pacific") 69 | TimeZone.setDefault(writeTimezone) 70 | val conn = openConnection() 71 | val timestamp = Timestamp.from(Instant.now()) 72 | conn.prepareStatement("INSERT INTO ${TIME_TABLE.fullName} (id,time,time_with_time_zone,time_with_local_time_zone) VALUES (?,?,?,?)").use { 73 | it.setInt(1,1) 74 | it.setTimestamp(2, timestamp) 75 | it.setTimestamp(3, timestamp) 76 | it.setTimestamp(4, timestamp) 77 | it.execute() 78 | } 79 | checkSourceReturnsCorrectTimestamp(conn,selectSource,timestamp.time,writeTimezone) 80 | //Now check the cdcSource 81 | checkSourceReturnsCorrectTimestamp(conn,getCdcSource(),timestamp.time,writeTimezone) 82 | 83 | 84 | } 85 | private fun checkSourceReturnsCorrectTimestamp(conn : Connection,source : Source<*>, millisSinceEpoch : Long, writeTimezone : TimeZone){ 86 | source.maybeStartQuery(conn) 87 | val result = source.poll() 88 | result.size.shouldBe(1) 89 | val after = result[0].cdcRecord.after 90 | after.shouldNotBeNull() 91 | val timeValue = after["TIME"] 92 | timeValue.shouldBeInstanceOf() 93 | val expectedInstant = ZonedDateTime.ofInstant(Instant.ofEpochMilli(millisSinceEpoch),writeTimezone.toZoneId()).toLocalDateTime().atZone(defaultZone).toInstant() 94 | timeValue.time.shouldBe(expectedInstant.toEpochMilli()) 95 | 96 | val timeWithTimeZone = after["TIME_WITH_TIME_ZONE"] 97 | timeWithTimeZone.shouldBeInstanceOf() 98 | timeWithTimeZone.time.shouldBe(millisSinceEpoch) 99 | val timeWithLocalTimeZone = after["TIME_WITH_LOCAL_TIME_ZONE"] 100 | timeWithLocalTimeZone.shouldBeInstanceOf() 101 | timeWithLocalTimeZone.time.shouldBe(millisSinceEpoch) 102 | } 103 | } -------------------------------------------------------------------------------- /.github/workflows/detekt.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | # This workflow performs a static analysis of your Kotlin source code using 7 | # Detekt. 8 | # 9 | # Scans are triggered: 10 | # 1. On every push to default and protected branches 11 | # 2. On every Pull Request targeting the default branch 12 | # 3. On a weekly schedule 13 | # 4. Manually, on demand, via the "workflow_dispatch" event 14 | # 15 | # The workflow should work with no modifications, but you might like to use a 16 | # later version of the Detekt CLI by modifing the $DETEKT_RELEASE_TAG 17 | # environment variable. 18 | name: Scan with Detekt 19 | 20 | on: 21 | # Triggers the workflow on push or pull request events but only for default and protected branches 22 | push: 23 | branches: [ master ] 24 | pull_request: 25 | branches: [ master ] 26 | schedule: 27 | - cron: '40 13 * * 2' 28 | 29 | # Allows you to run this workflow manually from the Actions tab 30 | workflow_dispatch: 31 | 32 | env: 33 | # Release tag associated with version of Detekt to be installed 34 | # SARIF support (required for this workflow) was introduced in Detekt v1.15.0 35 | DETEKT_RELEASE_TAG: v1.15.0 36 | 37 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 38 | jobs: 39 | # This workflow contains a single job called "scan" 40 | scan: 41 | name: Scan 42 | # The type of runner that the job will run on 43 | runs-on: ubuntu-latest 44 | 45 | # Steps represent a sequence of tasks that will be executed as part of the job 46 | steps: 47 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 48 | - uses: actions/checkout@v3 49 | 50 | # Gets the download URL associated with the $DETEKT_RELEASE_TAG 51 | - name: Get Detekt download URL 52 | id: detekt_info 53 | env: 54 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 55 | run: | 56 | gh api graphql --field tagName=$DETEKT_RELEASE_TAG --raw-field query=' 57 | query getReleaseAssetDownloadUrl($tagName: String!) { 58 | repository(name: "detekt", owner: "detekt") { 59 | release(tagName: $tagName) { 60 | releaseAssets(name: "detekt", first: 1) { 61 | nodes { 62 | downloadUrl 63 | } 64 | } 65 | tagCommit { 66 | oid 67 | } 68 | } 69 | } 70 | } 71 | ' 1> gh_response.json 72 | 73 | DETEKT_RELEASE_SHA=$(jq --raw-output '.data.repository.release.releaseAssets.tagCommit.oid' gh_response.json) 74 | if [ $DETEKT_RELEASE_SHA != "37f0a1d006977512f1f216506cd695039607c3e5" ]; then 75 | echo "Release tag doesn't match expected commit SHA" 76 | exit 1 77 | fi 78 | 79 | DETEKT_DOWNLOAD_URL=$(jq --raw-output '.data.repository.release.releaseAssets.nodes[0].downloadUrl' gh_response.json) 80 | echo "::set-output name=download_url::$DETEKT_DOWNLOAD_URL" 81 | 82 | # Sets up the detekt cli 83 | - name: Setup Detekt 84 | run: | 85 | dest=$( mktemp -d ) 86 | curl --request GET \ 87 | --url ${{ steps.detekt_info.outputs.download_url }} \ 88 | --silent \ 89 | --location \ 90 | --output $dest/detekt 91 | chmod a+x $dest/detekt 92 | echo $dest >> $GITHUB_PATH 93 | 94 | # Performs static analysis using Detekt 95 | - name: Run Detekt 96 | continue-on-error: true 97 | run: | 98 | detekt --input ${{ github.workspace }} --report sarif:${{ github.workspace }}/detekt.sarif.json 99 | 100 | # Modifies the SARIF output produced by Detekt so that absolute URIs are relative 101 | # This is so we can easily map results onto their source files 102 | # This can be removed once relative URI support lands in Detekt: https://git.io/JLBbA 103 | - name: Make artifact location URIs relative 104 | continue-on-error: true 105 | run: | 106 | echo "$( 107 | jq \ 108 | --arg github_workspace ${{ github.workspace }} \ 109 | '. | ( .runs[].results[].locations[].physicalLocation.artifactLocation.uri |= if test($github_workspace) then .[($github_workspace | length | . + 1):] else . end )' \ 110 | ${{ github.workspace }}/detekt.sarif.json 111 | )" > ${{ github.workspace }}/detekt.sarif.json 112 | 113 | # Uploads results to GitHub repository using the upload-sarif action 114 | - uses: github/codeql-action/upload-sarif@v2 115 | with: 116 | # Path to SARIF file relative to the root of the repository 117 | sarif_file: ${{ github.workspace }}/detekt.sarif.json 118 | checkout_path: ${{ github.workspace }} 119 | -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/AbstractIntegrationTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.logminer.LogminerSource 4 | import io.kotest.matchers.shouldBe 5 | import io.kotest.matchers.types.shouldBeInstanceOf 6 | import org.apache.kafka.connect.data.Decimal 7 | import org.apache.kafka.connect.data.Schema 8 | import org.junit.jupiter.api.Assertions.* 9 | import org.testcontainers.containers.OracleContainer 10 | import org.testcontainers.ext.ScriptUtils 11 | import org.testcontainers.jdbc.JdbcDatabaseDelegate 12 | import org.testcontainers.junit.jupiter.Container 13 | import java.math.BigDecimal 14 | import java.sql.Connection 15 | import java.sql.Date 16 | import java.sql.Timestamp 17 | import java.time.Instant 18 | import java.time.LocalDate 19 | import java.time.ZoneId 20 | 21 | enum class Columns { 22 | ID, TIME, STRING, integer, long, date, BIG_DECIMAL 23 | } 24 | 25 | const val OWNER = "SIT" 26 | const val TABLE_NAME = "TEST_TAB" 27 | val STANDARD_TABLE = TableId(OWNER, TABLE_NAME) 28 | val SECOND_TABLE = TableId(OWNER, "SECOND_TAB") 29 | 30 | abstract class AbstractIntegrationTest { 31 | protected open val defaultZone = ZoneId.of("Europe/Berlin") 32 | 33 | @Container 34 | protected open val oracle: OracleContainer = 35 | OracleContainer("thake/oracle-xe-11g-archivelog").withInitScript(getInitScript()).withReuse(false) 36 | 37 | protected open fun getInitScript() = "initTestTable.sql" 38 | fun runScripts(vararg scriptFiles : String){ 39 | scriptFiles.forEach {script -> 40 | ScriptUtils.runInitScript(JdbcDatabaseDelegate(oracle, ""), script) 41 | } 42 | } 43 | fun openConnection(): Connection = oracle.createConnection("") 44 | 45 | protected fun assertContainsOnlySpecificOperationForIds( 46 | toCheck: List, 47 | idRange: IntRange, 48 | operation: Operation, 49 | table: TableId = STANDARD_TABLE 50 | ) { 51 | assertContainsSpecificOperationForIds(toCheck, idRange, operation, table) 52 | assertEquals(idRange.count(), toCheck.size) 53 | } 54 | 55 | protected fun assertContainsSpecificOperationForIds( 56 | toCheck: List, 57 | idRange: IntRange, 58 | operation: Operation, 59 | table: TableId = STANDARD_TABLE 60 | ) { 61 | idRange.forEach { id -> 62 | //Find it in the records 63 | val record = toCheck.map { it.cdcRecord }.singleOrNull { 64 | val correctOperationAndName = it.operation == operation && table == it.table 65 | correctOperationAndName && when (operation) { 66 | Operation.READ, Operation.INSERT -> it.after != null && it.before == null && it.after!!["ID"] == id 67 | Operation.UPDATE -> it.after != null && it.before != null && it.before!!["ID"] == id 68 | Operation.DELETE -> it.after == null && it.before != null && it.before!!["ID"] == id 69 | } 70 | } 71 | assertNotNull(record, "Couldn't find a matching insert row for $id in table $table and operation $operation") 72 | 73 | } 74 | 75 | } 76 | 77 | protected fun LogminerSource.getResults(conn: Connection): List { 78 | this.maybeStartQuery(conn) 79 | return this.poll() 80 | } 81 | 82 | protected fun assertAllBeforeColumnsContained(result: List, columnNames : List = Columns.values().map { it.name }) { 83 | result.forEach { assertAllColumnsContained(it.cdcRecord.before, columnNames,it.cdcRecord.dataSchema.valueSchema) } 84 | } 85 | 86 | protected fun assertAllAfterColumnsContained(result: List, columnNames : List = Columns.values().map { it.name }) { 87 | result.forEach { assertAllColumnsContained(it.cdcRecord.after, columnNames,it.cdcRecord.dataSchema.valueSchema) } 88 | } 89 | 90 | 91 | private fun assertAllColumnsContained(valueMap: Map?, columnNames : List, expectedSchema : Schema) { 92 | assertNotNull(valueMap) 93 | val keys = valueMap!!.keys 94 | val leftOverKeys = columnNames.toMutableList().apply { removeAll(keys) } 95 | assertTrue(leftOverKeys.isEmpty(), "Some columns are missing: $leftOverKeys") 96 | assertEquals(columnNames.size, keys.size) 97 | assertValuesMatchSchema(valueMap,expectedSchema) 98 | } 99 | private fun assertValuesMatchSchema(valueMap: Map, expectedSchema: Schema){ 100 | expectedSchema.fields().forEach { 101 | val value = valueMap[it.name()] 102 | val fieldSchema = it.schema() 103 | if(fieldSchema.name() == Decimal.LOGICAL_NAME && value != null){ 104 | value.shouldBeInstanceOf() 105 | value.scale().shouldBe(fieldSchema.parameters()[Decimal.SCALE_FIELD]!!.toInt()) 106 | } 107 | } 108 | } 109 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/issues/Issue14Test.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.issues 2 | 3 | import com.github.thake.logminer.kafka.connect.* 4 | import io.kotest.matchers.collections.shouldBeEmpty 5 | import io.kotest.matchers.collections.shouldHaveSize 6 | import io.kotest.matchers.nulls.shouldBeNull 7 | import org.apache.kafka.connect.data.Struct 8 | import org.apache.kafka.connect.source.SourceRecord 9 | import org.apache.kafka.connect.source.SourceTaskContext 10 | import org.apache.kafka.connect.storage.OffsetStorageReader 11 | import org.junit.jupiter.api.AfterEach 12 | import org.junit.jupiter.api.Assertions.assertEquals 13 | import org.junit.jupiter.api.Assertions.assertTrue 14 | import org.junit.jupiter.api.BeforeEach 15 | import org.junit.jupiter.api.Test 16 | import org.slf4j.LoggerFactory 17 | import org.testcontainers.junit.jupiter.Testcontainers 18 | import java.sql.Connection 19 | import java.sql.Types 20 | import java.util.* 21 | 22 | @Testcontainers 23 | class Issue14Test : AbstractIntegrationTest() { 24 | private lateinit var sourceTask: SourceTask 25 | private lateinit var offsetManager: MockOffsetStorageReader 26 | private lateinit var defaultConfig: Map 27 | private val log = LoggerFactory.getLogger(Issue14Test::class.java) 28 | 29 | private class TestSourceTaskContext( 30 | val configs: Map, 31 | private val storageReader: OffsetStorageReader = MockOffsetStorageReader() 32 | ) : SourceTaskContext { 33 | 34 | override fun configs(): MutableMap { 35 | return this.configs.toMutableMap() 36 | } 37 | 38 | override fun offsetStorageReader(): OffsetStorageReader { 39 | return storageReader 40 | } 41 | 42 | } 43 | 44 | private class MockOffsetStorageReader : OffsetStorageReader { 45 | private var currentOffset = mutableMapOf() 46 | fun updateOffset(offset: MutableMap) { 47 | currentOffset = offset 48 | } 49 | 50 | override fun offsets(partitions: MutableCollection>?): MutableMap, MutableMap> { 51 | return Collections.emptyMap() 52 | } 53 | 54 | override fun offset(partition: MutableMap?): MutableMap { 55 | return currentOffset 56 | } 57 | 58 | } 59 | 60 | @BeforeEach 61 | fun setup() { 62 | defaultConfig = 63 | with(SourceConnectorConfig.Companion) { 64 | mapOf( 65 | BATCH_SIZE to "1000", 66 | DB_NAME to "test", 67 | DB_FETCH_SIZE to "10000", 68 | DB_SID to oracle.sid, 69 | DB_HOST to oracle.containerIpAddress, 70 | DB_PORT to oracle.oraclePort.toString(), 71 | DB_USERNAME to oracle.username, 72 | DB_PASSWORD to oracle.password, 73 | START_SCN to "0", 74 | MONITORED_TABLES to STANDARD_TABLE.fullName + ", " + SECOND_TABLE.fullName 75 | ) 76 | } 77 | sourceTask = SourceTask() 78 | offsetManager = MockOffsetStorageReader() 79 | sourceTask.initialize(TestSourceTaskContext(defaultConfig, offsetManager)) 80 | //Wait for tables to correctly initialize 81 | Thread.sleep(5000) 82 | } 83 | 84 | private fun createConfiguration(map: Map? = null): Map { 85 | return defaultConfig.toMutableMap().apply { map?.let { putAll(it) } } 86 | } 87 | 88 | @AfterEach 89 | fun tearDown() { 90 | sourceTask.stop() 91 | } 92 | 93 | @Test 94 | fun testUpdateColumnToNull() { 95 | sourceTask.start( 96 | createConfiguration( 97 | mapOf( 98 | SourceConnectorConfig.BATCH_SIZE to "10" 99 | ) 100 | ) 101 | ) 102 | val modifyingConnection = openConnection() 103 | //Initial state 104 | modifyingConnection.insertRow(1) 105 | var result = sourceTask.poll().toMutableList() 106 | assertTrue(result.isNotEmpty()) 107 | 108 | modifyingConnection.prepareStatement("UPDATE ${STANDARD_TABLE.fullName} SET STRING = ?").use { stmt -> 109 | stmt.setNull(1, Types.NVARCHAR) 110 | stmt.executeUpdate() 111 | } 112 | 113 | result = sourceTask.readAllSourceRecords() as MutableList 114 | assertTrue(result.size == 1) 115 | ((result[0].value() as Struct).get("after") as Struct).getString("STRING").shouldBeNull() 116 | } 117 | 118 | private fun SourceTask.readAllSourceRecords(): List { 119 | val result = mutableListOf() 120 | while (true) { 121 | val currentResult = poll() 122 | if (currentResult.isEmpty()) { 123 | break 124 | } else { 125 | result.addAll(currentResult) 126 | } 127 | } 128 | return result 129 | } 130 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/logminer/TransactionConsolidator.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.logminer 2 | 3 | import com.github.thake.logminer.kafka.connect.CdcRecord 4 | import com.github.thake.logminer.kafka.connect.OracleLogOffset 5 | import com.github.thake.logminer.kafka.connect.PollResult 6 | import com.github.thake.logminer.kafka.connect.SchemaService 7 | import com.github.thake.logminer.sql.parser.LogminerSqlParserException 8 | import mu.KotlinLogging 9 | import net.openhft.chronicle.queue.ChronicleQueue 10 | import org.apache.kafka.connect.errors.DataException 11 | import java.nio.file.Files 12 | import java.nio.file.Path 13 | import java.sql.Connection 14 | import java.util.* 15 | import java.util.stream.Collectors 16 | import kotlin.math.min 17 | 18 | 19 | class TransactionConsolidator( 20 | val schemaService: SchemaService 21 | ) { 22 | var conn : Connection? = null 23 | private var lastCommittedTransaction: Transaction? = null 24 | var lastCommitScn: Long? = null 25 | private val openTransactions: MutableMap = mutableMapOf() 26 | val hasOutstandingCommittedResults 27 | get() = lastCommittedTransaction?.hasMoreRecords ?: false 28 | var minOpenTransaction: Transaction? = null 29 | private val baseDir: Path = 30 | Files.createTempDirectory("kafaka-oracle-connect") 31 | private val logger = KotlinLogging.logger {} 32 | fun commit(commitRow: LogminerRow.Commit) { 33 | val recordsInTransaction = openTransactions.remove(commitRow.transaction) 34 | lastCommitScn = commitRow.rowIdentifier.scn 35 | if (recordsInTransaction != null) { 36 | refreshMinOpenScn() 37 | recordsInTransaction.commit(commitRow) 38 | lastCommittedTransaction = recordsInTransaction 39 | } 40 | } 41 | 42 | fun rollback(rollbackRow: LogminerRow.Rollback) { 43 | openTransactions.remove(rollbackRow.transaction)?.rollback() 44 | refreshMinOpenScn() 45 | } 46 | 47 | fun getOutstandingCommittedResults(batchSize: Int): List { 48 | return lastCommittedTransaction?.let { lastCommitted -> 49 | val loadedRecords = lastCommitted.readRecords(batchSize) 50 | val transactionCompleted = !lastCommitted.hasMoreRecords 51 | @Suppress("UNCHECKED_CAST") //Explicit != null check. Warning is a false positive 52 | (loadedRecords.parallelStream().map { 53 | try { 54 | val cdcRecord = convertToCdcRecord(it, lastCommitted) 55 | PollResult( 56 | cdcRecord, 57 | OracleLogOffset.create( 58 | min( 59 | it.rowIdentifier.scn, 60 | minOpenTransaction?.minScn ?: Long.MAX_VALUE 61 | ), 62 | lastCommitted.commitScn!!, 63 | transactionCompleted 64 | ) 65 | ) 66 | } catch (e: LogminerSqlParserException) { 67 | logger.error(e) { "Skipping record for table ${it.table} with row identifier ${it.rowIdentifier}. Could not parse SQL statement \"${it.sqlRedo}\"." } 68 | null 69 | } 70 | }.filter { it != null }.collect(Collectors.toList()) as List).also { 71 | if (transactionCompleted) { 72 | lastCommitted.close() 73 | lastCommittedTransaction = null 74 | } 75 | } 76 | } ?: Collections.emptyList() 77 | } 78 | 79 | private fun convertToCdcRecord(it: LogminerRow.Change, lastCommitted: Transaction): CdcRecord { 80 | return try { 81 | it.toCdcRecord(lastCommitted.transactionSchemas[it.table]!!) 82 | } catch (e: DataException) { 83 | logger.info { "Couldn't convert a logminer row to a cdc record. This may be caused by a changed schema. Schema will be refreshed and conversion will be tried again." } 84 | lastCommitted.updateSchemaIfOutdated(it.table) 85 | it.toCdcRecord(lastCommitted.transactionSchemas[it.table]!!).also { 86 | logger.info { "Conversion to cdc record was successful with refreshed schema." } 87 | } 88 | } 89 | } 90 | 91 | fun addChange(changeRow: LogminerRow.Change) { 92 | val existingOpenTransaction = openTransactions[changeRow.transaction] 93 | if (existingOpenTransaction != null) { 94 | existingOpenTransaction.addChange(changeRow) 95 | } else { 96 | val newTransaction = Transaction({ this.createQueue(it) }, conn!!, changeRow, schemaService) 97 | openTransactions[changeRow.transaction] = newTransaction 98 | if (minOpenTransaction == null) { 99 | minOpenTransaction = newTransaction 100 | } 101 | } 102 | } 103 | 104 | fun clear() { 105 | this.lastCommittedTransaction?.close() 106 | this.openTransactions.values.forEach { it.close() } 107 | } 108 | 109 | private fun createQueue(xid: String): ChronicleQueue { 110 | return ChronicleQueue.singleBuilder(baseDir.resolve(xid)).build() 111 | } 112 | 113 | private fun refreshMinOpenScn() { 114 | minOpenTransaction = openTransactions.values.minByOrNull { it.minScn } 115 | } 116 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/SchemaService.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import mu.KotlinLogging 4 | import org.apache.kafka.connect.data.Schema 5 | import org.apache.kafka.connect.data.SchemaBuilder 6 | import java.sql.Connection 7 | import java.time.ZoneId 8 | 9 | private val logger = KotlinLogging.logger {} 10 | 11 | data class ColumnDefinition( 12 | val name: String, 13 | val type: String, 14 | val scale: Int?, 15 | val precision: Int, 16 | val defaultValue: String?, 17 | val isNullable: Boolean, 18 | val doc: String? 19 | ) 20 | 21 | data class SchemaDefinition( 22 | val table: TableId, 23 | val valueSchema: Schema, 24 | val keySchema: Schema, 25 | private val columnTypes: Map> 26 | ) { 27 | fun getColumnSchemaType(columnName: String) = columnTypes[columnName] 28 | } 29 | 30 | class SchemaService( 31 | private val nameService: ConnectNameService, 32 | private val defaultZoneId : ZoneId 33 | ) { 34 | private val cachedSchemas: MutableMap = mutableMapOf() 35 | 36 | fun getSchema(dbConn: Connection, table: TableId) = cachedSchemas.getOrPut(table, { buildTableSchema(dbConn, table) }) 37 | fun refreshSchema(dbConn: Connection, table: TableId): SchemaDefinition { 38 | cachedSchemas.remove(table) 39 | return getSchema(dbConn, table) 40 | } 41 | 42 | private fun buildTableSchema(dbConn: Connection, table: TableId): SchemaDefinition { 43 | logger.info { "Getting dictionary details for table : $table" } 44 | 45 | val valueSchemaBuilder = 46 | SchemaBuilder.struct().name(nameService.getBeforeAfterStructName(table)) 47 | val keySchemaBuilder = SchemaBuilder.struct().name(nameService.getKeyRecordName(table)) 48 | val columnTypes = mutableMapOf>() 49 | val primaryKeys = mutableSetOf() 50 | dbConn.metaData.getPrimaryKeys(null, table.owner, table.table).use { 51 | while (it.next()) { 52 | primaryKeys.add(it.getString(4)) 53 | } 54 | } 55 | dbConn.prepareStatement( 56 | """ 57 | SELECT 58 | COLUMN_NAME, 59 | DATA_TYPE, 60 | DATA_LENGTH, 61 | DATA_PRECISION, 62 | DATA_SCALE, 63 | NULLABLE, 64 | DATA_DEFAULT, 65 | HIGH_VALUE, 66 | COMMENTS 67 | FROM SYS.ALL_TAB_COLUMNS COL LEFT JOIN SYS.ALL_COL_COMMENTS COM USING (COLUMN_NAME,OWNER,TABLE_NAME) 68 | WHERE OWNER = ? AND TABLE_NAME = ? 69 | """ 70 | ).apply { 71 | setString(1, table.owner) 72 | setString(2, table.table) 73 | }.use { 74 | it.executeQuery().use { result -> 75 | while (result.next()) { 76 | val defaultValue = result.getString("DATA_DEFAULT") 77 | val name = result.getString("COLUMN_NAME") 78 | val precision = result.getInt("DATA_PRECISION") 79 | val scale = result.getInt("DATA_SCALE").let { scale -> 80 | if (result.wasNull() || (scale == 0 && precision == 0)) { 81 | null 82 | } else { 83 | scale 84 | } 85 | } 86 | val type = result.getString("DATA_TYPE") 87 | val doc = result.getString("COMMENTS") 88 | val nullable = result.getString("NULLABLE") == "Y" 89 | val columnDef = ColumnDefinition(name, type, scale, precision, defaultValue, nullable, doc) 90 | val schemaType = SchemaType.toSchemaType(columnDef,defaultZoneId) 91 | columnTypes[name] = schemaType 92 | val columnSchema = createColumnSchema(columnDef, schemaType) 93 | valueSchemaBuilder.field(name, columnSchema) 94 | if (primaryKeys.contains(name)) { 95 | keySchemaBuilder.field(name, columnSchema) 96 | } 97 | } 98 | } 99 | } 100 | 101 | 102 | return SchemaDefinition( 103 | table, 104 | valueSchemaBuilder.optional().build(), 105 | keySchemaBuilder.required().build(), 106 | columnTypes 107 | ) 108 | } 109 | 110 | private fun createColumnSchema(column: ColumnDefinition, schemaType: SchemaType): Schema { 111 | val builder = schemaType.createSchemaBuilder() 112 | if (column.isNullable) { 113 | builder.optional() 114 | } 115 | if (column.doc != null) { 116 | builder.doc(column.doc) 117 | } 118 | if (column.isNullable || column.defaultValue != null) { 119 | val defaultValue = if (column.defaultValue == null) { 120 | null 121 | } else { 122 | try { 123 | schemaType.convertDefaultValue(column.defaultValue) 124 | } catch (e: Exception) { 125 | logger 126 | .warn(e) { "Couldn't convert the default value for column ${column.name} from string '${column.defaultValue}'" } 127 | null 128 | } 129 | } 130 | if (column.isNullable || defaultValue != null) { 131 | builder.defaultValue(defaultValue) 132 | } 133 | } 134 | return builder.build() 135 | } 136 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/logminer/LogminerRow.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.logminer 2 | 3 | 4 | import com.github.thake.logminer.kafka.connect.* 5 | import com.github.thake.logminer.sql.parser.LogminerSqlParserUtil 6 | import com.github.thake.logminer.sql.parser.expression.* 7 | import com.github.thake.logminer.sql.parser.expression.operators.relational.EqualsTo 8 | import com.github.thake.logminer.sql.parser.expression.operators.relational.ExpressionList 9 | import com.github.thake.logminer.sql.parser.schema.Column 10 | import com.github.thake.logminer.sql.parser.statement.delete.Delete 11 | import com.github.thake.logminer.sql.parser.statement.insert.Insert 12 | import com.github.thake.logminer.sql.parser.statement.update.Update 13 | import org.apache.kafka.connect.errors.DataException 14 | import java.sql.Timestamp 15 | 16 | data class LogminerRowIdentifier( 17 | val scn: Long, 18 | val rowId: String 19 | ) 20 | 21 | sealed class LogminerRow { 22 | abstract val rowIdentifier: LogminerRowIdentifier 23 | abstract val transaction: String 24 | 25 | data class Commit( 26 | override val rowIdentifier: LogminerRowIdentifier, 27 | override val transaction: String 28 | ) : LogminerRow() 29 | 30 | data class Rollback( 31 | override val rowIdentifier: LogminerRowIdentifier, 32 | override val transaction: String 33 | ) : LogminerRow() 34 | 35 | data class Change( 36 | override val rowIdentifier: LogminerRowIdentifier, 37 | val timestamp: Timestamp, 38 | override val transaction: String, 39 | val username: String, 40 | val table: TableId, 41 | val sqlRedo: String, 42 | val operation: Operation, 43 | val status : Int = 0 44 | ) : LogminerRow() { 45 | private data class ChangeData(val before: Map?, val after: Map?) 46 | 47 | fun toCdcRecord(schemaDefinition: SchemaDefinition): CdcRecord { 48 | val sqlData = parseSql(schemaDefinition, sqlRedo) 49 | return CdcRecord( 50 | rowIdentifier.scn, 51 | rowIdentifier.rowId, 52 | table, 53 | timestamp, 54 | operation, 55 | transaction, 56 | username, 57 | schemaDefinition, 58 | sqlData.before, 59 | sqlData.after 60 | ) 61 | } 62 | 63 | private fun parseSql( 64 | schemaDefinition: SchemaDefinition, 65 | sqlRedo: String 66 | ): ChangeData { 67 | val stmt = LogminerSqlParserUtil.parse(sqlRedo) 68 | val dataMap = when (stmt) { 69 | is Insert -> { 70 | stmt.columns.map { extractStringRepresentation(it)!! } 71 | .zip((stmt.itemsList as ExpressionList).expressions.map { extractStringRepresentation(it) }) 72 | .toMap() 73 | } 74 | is Update -> { 75 | stmt.columns.map { extractStringRepresentation(it)!! } 76 | .zip(stmt.expressions.map { extractStringRepresentation(it) }).toMap() 77 | } 78 | else -> null 79 | } 80 | val beforeDataMap = when (stmt) { 81 | is Update -> { 82 | WhereVisitor() 83 | .apply { stmt.where.accept(this) }.before 84 | } 85 | is Delete -> { 86 | WhereVisitor() 87 | .apply { stmt.where.accept(this) }.before 88 | } 89 | else -> null 90 | } 91 | //Return converted values 92 | return ChangeData( 93 | beforeDataMap?.convertToSchemaTypes(schemaDefinition), 94 | dataMap?.convertToSchemaTypes(schemaDefinition) 95 | ) 96 | } 97 | 98 | 99 | private fun Map.convertToSchemaTypes(schemaDefinition: SchemaDefinition): Map { 100 | fun doConvert(schema: SchemaDefinition = schemaDefinition): Map { 101 | val map = HashMap() 102 | for ((key, value) in this) { 103 | val columnSchema = schema.getColumnSchemaType(key)?: throw DataException("Column $key does not exist in schema.") 104 | val convertedValue = value?.let { 105 | convertToSchemaType( 106 | it, 107 | columnSchema 108 | ) 109 | } 110 | map[key] = convertedValue 111 | } 112 | return map 113 | } 114 | return doConvert() 115 | } 116 | 117 | private fun convertToSchemaType(value: String, schemaType: SchemaType) = 118 | if (value == LogminerSchema.NULL_VALUE) null else schemaType.convert(value) 119 | 120 | private class WhereVisitor : ExpressionVisitorAdapter() { 121 | val before = mutableMapOf() 122 | override fun visit(expr: EqualsTo) { 123 | val columnName = extractStringRepresentation(expr.leftExpression) 124 | if (columnName != null) { 125 | before[columnName] = extractStringRepresentation(expr.rightExpression) 126 | } 127 | } 128 | } 129 | } 130 | 131 | } 132 | 133 | private fun extractStringRepresentation(expr: Expression): String? { 134 | return when (expr) { 135 | is NullValue -> null 136 | is DateTimeLiteralExpression -> expr.value 137 | is StringValue -> expr.value 138 | is Column -> expr.columnName.removeSurrounding("\"") 139 | else -> expr.toString() 140 | } 141 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/logminer/LogminerSource.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.logminer 2 | 3 | 4 | import com.github.thake.logminer.kafka.connect.* 5 | import mu.KotlinLogging 6 | import java.sql.Connection 7 | 8 | private val logger = KotlinLogging.logger {} 9 | 10 | 11 | data class LogminerConfiguration( 12 | val logMinerSelectors: List, 13 | val logminerDictionarySource : LogminerDictionarySource, 14 | val batchSize: Int = 1000, 15 | val fetchSize: Int = batchSize 16 | ) 17 | 18 | class LogminerSource( 19 | private val config: LogminerConfiguration, 20 | schemaService: SchemaService, 21 | offset: OracleLogOffset 22 | ) : Source { 23 | private var currentState: QueryStartedState? = null 24 | private var lastOffset: FetcherOffset = 25 | FetcherOffset(offset.scn, null, offset.commitScn, offset.isTransactionComplete) 26 | private var transactionConsolidator: TransactionConsolidator = TransactionConsolidator(schemaService) 27 | 28 | override fun getOffset() = (currentState?.readLogOffset ?: lastOffset).toOffset() 29 | 30 | override fun maybeStartQuery(db: Connection) { 31 | val state = currentState 32 | 33 | if (state != null && state.needsRestart(db)) { 34 | //Now reset the current fetcher if it is not valid for the db or it has no more results to deliver. 35 | lastOffset = state.readLogOffset 36 | state.close() 37 | currentState = null 38 | } 39 | //Now set the state to started with a new Fetcher 40 | if (currentState == null) { 41 | val fetcher = LogminerFetcher(db, lastOffset, config) 42 | transactionConsolidator.conn = db 43 | currentState = QueryStartedState(fetcher, config.batchSize,transactionConsolidator , lastOffset) 44 | } 45 | } 46 | override fun poll(): List { 47 | return currentState.let { 48 | if (it == null) { 49 | throw IllegalStateException("Query has not been initialized") 50 | } 51 | it.poll() 52 | } 53 | } 54 | fun stopLogminer() { 55 | currentState?.fetcher?.endLogminerSession() 56 | } 57 | override fun close() { 58 | currentState?.close() 59 | currentState = null 60 | transactionConsolidator.clear() 61 | } 62 | 63 | private class QueryStartedState( 64 | val fetcher: LogminerFetcher, 65 | val batchSize: Int, 66 | val transactionConsolidator: TransactionConsolidator, 67 | var readLogOffset: FetcherOffset 68 | ) { 69 | private var forcedRestart = false 70 | fun needsRestart(conn : Connection) : Boolean { 71 | return forcedRestart || !isValidFor(conn) || fetcher.hasReachedEnd 72 | } 73 | 74 | fun isValidFor(conn: Connection) = fetcher.conn == conn 75 | 76 | fun poll(): List { 77 | val result = mutableListOf() 78 | 79 | logger.debug { "Fetching records. Batch size is $batchSize" } 80 | var continueLoop = true 81 | while (continueLoop) { 82 | result.addAll(transactionConsolidator.getOutstandingCommittedResults(batchSize)) 83 | //Only fetch new rows from database if we haven't reached the batch size. 84 | continueLoop = if (result.size < batchSize) { 85 | val hadMoreRows = processNextLogminerRow() 86 | hadMoreRows || transactionConsolidator.hasOutstandingCommittedResults 87 | } else { 88 | false 89 | } 90 | } 91 | 92 | logger.debug { "New fetcher offset: $readLogOffset" } 93 | return result 94 | } 95 | 96 | private fun processNextLogminerRow(): Boolean { 97 | var nextRow : LogminerRow = fetcher.nextRow() ?: return false 98 | while(nextRow is LogminerRow.Change && nextRow.status == 2){ 99 | val change = nextRow 100 | logger.warn { """ 101 | Fetched a not readable row from logminer. This most probably indicates that a DDL statement has been executed which adds a new column with a not null default value. 102 | Oracle internally first updates all existing rows with the default value and only afterwards executes the alter statement. Thus logminer has no information of the new column when reading the update statements. 103 | 104 | If this is the case, consider changing the way a not null column with default value will be added. Instead of doing everything in one command, one could separate it into the following steps: 105 | 1. Adding new nullable column 106 | 2. Adding a trigger on insert that automatically inserts the default value for the new nullable column if it is not specified. 107 | 3. Updating the column with the default value for all existing rows 108 | 4. Changing the definition of the column to be NOT NULL with the default value. 109 | 5. Dropping trigger on insert 110 | Performing the DDL in this way would guarantee that the change log will be readable by logminer. 111 | 112 | Skipped not readable SQL: ${change.sqlRedo}""".trimIndent() 113 | } 114 | nextRow = fetcher.nextRow() ?: return false 115 | } 116 | 117 | when (nextRow) { 118 | is LogminerRow.Commit -> transactionConsolidator.commit(nextRow) 119 | is LogminerRow.Rollback -> transactionConsolidator.rollback(nextRow) 120 | is LogminerRow.Change -> transactionConsolidator.addChange(nextRow) 121 | } 122 | readLogOffset = 123 | FetcherOffset( 124 | nextRow.rowIdentifier, 125 | transactionConsolidator.lastCommitScn ?: readLogOffset.commitScn, 126 | true 127 | ) 128 | return true 129 | } 130 | 131 | 132 | fun close() { 133 | fetcher.close() 134 | } 135 | } 136 | 137 | 138 | } 139 | -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/ConnectSchemaFactory.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.SourceRecordFields.sourceSchema 4 | import mu.KotlinLogging 5 | import org.apache.kafka.connect.data.Schema 6 | import org.apache.kafka.connect.data.SchemaBuilder 7 | import org.apache.kafka.connect.data.Struct 8 | import org.apache.kafka.connect.data.Timestamp 9 | import org.apache.kafka.connect.source.SourceRecord 10 | 11 | private val logger = KotlinLogging.logger {} 12 | 13 | object CdcRecordFields { 14 | 15 | const val OPERATION = "op" 16 | const val SOURCE = "source" 17 | const val PUBLISH_TIMESTAMP = "ts_ms" 18 | const val BEFORE = "before" 19 | const val AFTER = "after" 20 | } 21 | 22 | object SourceRecordFields { 23 | private const val VERSION = "version" 24 | private const val CONNECTOR = "connector" 25 | private const val RECORD_TIMESTAMP = "ts_ms" 26 | private const val TRANSACTION = "txId" 27 | private const val SCN = "scn" 28 | private const val OWNER = "schema" 29 | private const val TABLE = "table" 30 | private const val CHANGE_USER = "user" 31 | val sourceSchema: Schema = 32 | SchemaBuilder.struct().name(LogminerSourceConnector::class.java.`package`.name + ".Source") 33 | .field(VERSION, Schema.STRING_SCHEMA) 34 | .field(CONNECTOR, Schema.STRING_SCHEMA) 35 | .field(RECORD_TIMESTAMP, Timestamp.SCHEMA) 36 | .field(TRANSACTION, Schema.OPTIONAL_STRING_SCHEMA) 37 | .field(SCN, Schema.INT64_SCHEMA) 38 | .field(OWNER, Schema.STRING_SCHEMA) 39 | .field(TABLE, Schema.STRING_SCHEMA) 40 | .field(CHANGE_USER, Schema.OPTIONAL_STRING_SCHEMA) 41 | .build() 42 | 43 | fun convert(cdcRecord: CdcRecord): Struct { 44 | return Struct(sourceSchema) 45 | .put(VERSION, LogminerSourceConnector.version) 46 | .put(CONNECTOR, LogminerSourceConnector.name) 47 | .put(RECORD_TIMESTAMP, cdcRecord.timestamp) 48 | .put(TRANSACTION, cdcRecord.transaction) 49 | .put(SCN, cdcRecord.scn) 50 | .put(OWNER, cdcRecord.table.owner) 51 | .put(TABLE, cdcRecord.table.table) 52 | .put(CHANGE_USER, cdcRecord.username) 53 | } 54 | } 55 | 56 | class ConnectSchemaFactory( 57 | private val nameService: ConnectNameService, 58 | private val isEmittingTombstones: Boolean 59 | ) { 60 | 61 | 62 | private fun createKeyStruct(cdcRecord: CdcRecord): Struct { 63 | val schema = cdcRecord.dataSchema.keySchema 64 | val struct = Struct(schema) 65 | val sourceMap = when (cdcRecord.operation) { 66 | Operation.READ, Operation.INSERT -> 67 | cdcRecord.after 68 | Operation.DELETE, Operation.UPDATE -> cdcRecord.before 69 | }!! 70 | schema.fields().forEach { 71 | struct.put(it.name(), sourceMap[it.name()]) 72 | } 73 | return struct 74 | } 75 | 76 | private fun createValue(record: CdcRecord): Pair { 77 | val name = nameService.getValueRecordName(record.table) 78 | val recordConnectSchema = record.dataSchema.valueSchema 79 | 80 | val valueSchema = SchemaBuilder.struct() 81 | .name(name) 82 | .field(CdcRecordFields.OPERATION, Schema.STRING_SCHEMA) 83 | .field(CdcRecordFields.BEFORE, recordConnectSchema) 84 | .field(CdcRecordFields.AFTER, recordConnectSchema) 85 | .field(CdcRecordFields.SOURCE, sourceSchema) 86 | .field(CdcRecordFields.PUBLISH_TIMESTAMP, Timestamp.SCHEMA) 87 | .optional() 88 | .build() 89 | val struct = with(record) { 90 | var updatedAfter = after 91 | 92 | val sourceStruct = SourceRecordFields.convert(record) 93 | val recordStruct = Struct(valueSchema) 94 | .put(CdcRecordFields.OPERATION, operation.stringRep) 95 | .put(CdcRecordFields.SOURCE, sourceStruct) 96 | .put(CdcRecordFields.PUBLISH_TIMESTAMP, java.util.Date()) 97 | if (operation == Operation.UPDATE && updatedAfter != null && before != null) { 98 | val originalAfter = updatedAfter 99 | //Enrich the after state with values from the before data set 100 | val enrichedAfter = originalAfter.toMutableMap() 101 | //Set the after value with the before value if the key does not exist in the after map. 102 | enrichedAfter.putAll(before.filter { it.key !in originalAfter }) 103 | updatedAfter = enrichedAfter 104 | } 105 | before?.let { 106 | recordStruct.put( 107 | CdcRecordFields.BEFORE, 108 | convertDataToStruct(recordConnectSchema, it) 109 | ) 110 | } 111 | updatedAfter?.let { 112 | recordStruct.put( 113 | CdcRecordFields.AFTER, 114 | convertDataToStruct(recordConnectSchema, it) 115 | ) 116 | } 117 | recordStruct 118 | } 119 | return Pair(valueSchema, struct) 120 | } 121 | 122 | fun convertToSourceRecords(pollResult: PollResult, partition: Map): List { 123 | 124 | val record = pollResult.cdcRecord 125 | val topic = nameService.getTopicName(record.table) 126 | 127 | val value = createValue(record) 128 | val keyStruct = createKeyStruct(record) 129 | val normalSourceRecord = SourceRecord( 130 | partition, 131 | pollResult.offset.map, 132 | topic, 133 | record.dataSchema.keySchema, 134 | keyStruct, 135 | value.first, 136 | value.second 137 | ) 138 | return if (isEmittingTombstones && pollResult.cdcRecord.operation == Operation.DELETE) { 139 | val deleteRecord = SourceRecord( 140 | partition, 141 | pollResult.offset.map, 142 | topic, 143 | record.dataSchema.keySchema, 144 | keyStruct, 145 | null, 146 | null 147 | ) 148 | listOf( 149 | normalSourceRecord, 150 | deleteRecord 151 | ) 152 | } else { 153 | listOf(normalSourceRecord) 154 | } 155 | 156 | } 157 | 158 | private fun convertDataToStruct(dataSchema: Schema, values: Map): Struct { 159 | return Struct(dataSchema).apply { 160 | values.keys.forEach { this.put(it, values.get(it)) } 161 | } 162 | } 163 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/logminer/LogminerFetcher.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.logminer 2 | 3 | 4 | import com.github.thake.logminer.kafka.connect.* 5 | import mu.KotlinLogging 6 | import java.sql.Connection 7 | import java.sql.PreparedStatement 8 | import java.sql.ResultSet 9 | import java.sql.Timestamp 10 | 11 | 12 | private val logger = KotlinLogging.logger {} 13 | 14 | 15 | 16 | /** 17 | * A specific runtime offset for the fetcher. 18 | */ 19 | data class FetcherOffset( 20 | val lastScn: Long, 21 | val lastRowId: String?, 22 | val commitScn: Long, 23 | val transactionCompleted: Boolean 24 | ) { 25 | constructor(lastRow: LogminerRowIdentifier, commitScn: Long, transactionCompleted: Boolean) : this( 26 | lastRow.scn, 27 | lastRow.rowId, 28 | commitScn, 29 | transactionCompleted 30 | ) 31 | 32 | //If the transaction in commitScn hasn't been completely returned, we need 33 | //to include the rows of the transaction. 34 | val lowestCommitScn = if (transactionCompleted) commitScn + 1 else commitScn 35 | val lowestChangeScn = if (transactionCompleted && commitScn == lastScn) lastScn + 1 else lastScn 36 | 37 | fun toOffset(): OracleLogOffset { 38 | return OracleLogOffset 39 | .create(lastScn, commitScn, transactionCompleted) 40 | } 41 | } 42 | 43 | class LogminerFetcher( 44 | val conn: Connection, 45 | private val initialOffset: FetcherOffset, 46 | config: LogminerConfiguration 47 | ) : 48 | AutoCloseable { 49 | 50 | val logminerSession : LogminerSession = LogminerSession.initSession(conn,initialOffset,config) 51 | private var needToSkipToOffsetStart = initialOffset.lastScn == initialOffset.lowestChangeScn 52 | private val resultSet: ResultSet = logminerSession.openResultSet() 53 | val hasReachedEnd: Boolean 54 | get() = resultSet.isClosed || resultSet.isAfterLast 55 | 56 | fun nextRow(): LogminerRow? { 57 | var loadedRow: LogminerRow? = null 58 | var firstRun = true 59 | while (loadedRow == null && resultSet.next()) { 60 | //First check if we need to skip rows because we haven't found the initial offset 61 | if (this.needToSkipToOffsetStart) { 62 | val skip = skipNeeded(firstRun) 63 | firstRun = false 64 | if (skip) { 65 | continue 66 | } 67 | } 68 | //Now do the real extracing 69 | loadedRow = extractRow() 70 | } 71 | return loadedRow 72 | } 73 | 74 | private fun skipNeeded(firstRun: Boolean): Boolean { 75 | val lastOpenRow = initialOffset.lastRowId ?: return false 76 | val lastScn = initialOffset.lastScn 77 | var skip = false 78 | val scn = resultSet.getLong(LogminerSchema.Fields.SCN) 79 | if (scn == lastScn) { 80 | val rowId = resultSet.getString(LogminerSchema.Fields.ROW_ID) 81 | if (rowId == lastOpenRow) { 82 | //We found the last read row id, we skip until we find a row that has CSF not set 83 | var nextRowBelongsToRowId = resultSet.getBoolean(LogminerSchema.Fields.CSF) 84 | while (nextRowBelongsToRowId && resultSet.next()) { 85 | nextRowBelongsToRowId = resultSet.getBoolean(LogminerSchema.Fields.CSF) 86 | } 87 | logger.debug { "Skipped all rows until row with ID '$rowId'(including) in order to correctly set offset." } 88 | needToSkipToOffsetStart = false 89 | } 90 | //We skip this row 91 | skip = true 92 | } else if (!firstRun) { 93 | throw IllegalStateException("Couldn't find the rowId $lastOpenRow in the logs for the scn $scn. The offset seems to be wrong.") 94 | } else { 95 | logger 96 | .warn { "Logminer result does not start with expected SCN. The archivelog containing the SCN seems to be deleted. The collected data has a gap between ${initialOffset.lastScn} (expected) and $scn (actual lowest SCN in logs). All changes happening between those SCNs have not been processed and may be lost." } 97 | needToSkipToOffsetStart = false 98 | } 99 | 100 | return skip 101 | } 102 | 103 | private fun extractRow(): LogminerRow? { 104 | val operationStr = resultSet.getString(LogminerSchema.Fields.OPERATION) 105 | val xid = resultSet.getString(LogminerSchema.Fields.XID) 106 | val rowIdentifier = LogminerRowIdentifier( 107 | resultSet.getLong(LogminerSchema.Fields.SCN), 108 | resultSet.getString(LogminerSchema.Fields.ROW_ID) 109 | ) 110 | return when (operationStr) { 111 | "COMMIT" -> LogminerRow.Commit(rowIdentifier, xid) 112 | "ROLLBACK" -> LogminerRow.Rollback(rowIdentifier, xid) 113 | else -> { 114 | extractChange(operationStr, rowIdentifier, xid) 115 | } 116 | } 117 | } 118 | 119 | private fun extractChange( 120 | operationStr: String, 121 | rowIdentifier: LogminerRowIdentifier, 122 | xid: String 123 | ): LogminerRow.Change? { 124 | val operation = Operation.valueOf(operationStr) 125 | val table = 126 | TableId( 127 | resultSet.getString(LogminerSchema.Fields.SEG_OWNER), 128 | resultSet.getString(LogminerSchema.Fields.TABLE_NAME) 129 | ) 130 | val sqlRedo: String = getRedoSql(resultSet) 131 | 132 | val timeStamp: Timestamp = resultSet.getTimestamp(LogminerSchema.Fields.TIMESTAMP) 133 | val username = resultSet.getString(LogminerSchema.Fields.USERNAME) 134 | val status = resultSet.getInt(LogminerSchema.Fields.STATUS) 135 | return if (sqlRedo.contains(LogminerSchema.TEMPORARY_TABLE)) { 136 | null 137 | } else { 138 | LogminerRow 139 | .Change(rowIdentifier, timeStamp, xid, username, table, sqlRedo, operation, status) 140 | } 141 | } 142 | 143 | 144 | private fun getRedoSql(resultSet: ResultSet): String { 145 | var sqlRedo: String = resultSet.getString(LogminerSchema.Fields.SQL_REDO) 146 | var contSF: Boolean = resultSet.getBoolean(LogminerSchema.Fields.CSF) 147 | while (contSF) { 148 | resultSet.next() 149 | sqlRedo += resultSet.getString(LogminerSchema.Fields.SQL_REDO) 150 | contSF = resultSet.getBoolean(LogminerSchema.Fields.CSF) 151 | } 152 | return sqlRedo 153 | } 154 | fun endLogminerSession() { 155 | logger.info { "Ending logminer session" } 156 | close() 157 | logminerSession.endSession() 158 | } 159 | override fun close() { 160 | logger.debug { "Closing fetcher of CDC records." } 161 | if (!resultSet.isClosed) resultSet.close() 162 | logminerSession.close() 163 | } 164 | 165 | } 166 | 167 | -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/SchemaServiceTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.SchemaType.NumberType.* 4 | import com.github.thake.logminer.kafka.connect.SchemaType.StringType 5 | import com.github.thake.logminer.kafka.connect.SchemaType.TimeType.DateType 6 | import com.github.thake.logminer.kafka.connect.SchemaType.TimeType.TimestampType 7 | import io.kotest.core.spec.IsolationMode 8 | import io.kotest.core.spec.Spec 9 | import io.kotest.core.spec.style.WordSpec 10 | import io.kotest.core.test.TestCase 11 | import io.kotest.core.test.TestResult 12 | import io.kotest.matchers.nulls.shouldNotBeNull 13 | import io.kotest.matchers.shouldBe 14 | import org.testcontainers.containers.OracleContainer 15 | import java.math.BigDecimal 16 | import java.sql.Connection 17 | import java.sql.DriverManager 18 | import java.sql.SQLException 19 | import java.sql.Timestamp 20 | import java.time.LocalDate 21 | import java.time.LocalDateTime 22 | import java.time.ZoneId 23 | import java.time.ZoneOffset 24 | 25 | class SchemaServiceTest : WordSpec() { 26 | 27 | val oracle: OracleContainer = 28 | OracleContainer("thake/oracle-xe-11g-archivelog").withInitScript("initTestTable.sql").withReuse(false) 29 | var table = TableId("SIT", "MY_SCHEMA_TEST_TABLE") 30 | val columnName = "A" 31 | lateinit var connection: Connection 32 | lateinit var schemaService: SchemaService 33 | private val zoneId = ZoneId.of("Europe/Berlin") 34 | private fun createTable(columnDef: String, comment: String?) { 35 | connection.prepareCall("CREATE TABLE ${table.fullName} ($columnName $columnDef)").use { it.execute() } 36 | if (comment != null) { 37 | connection.prepareCall("COMMENT ON COLUMN ${table.fullName}.$columnName IS '$comment'").use { it.execute() } 38 | } 39 | 40 | } 41 | 42 | override fun isolationMode(): IsolationMode? = IsolationMode.SingleInstance 43 | override fun beforeTest(testCase: TestCase) { 44 | connection = DriverManager.getConnection(oracle.jdbcUrl) 45 | connection.prepareCall("DROP TABLE ${table.fullName}").use { 46 | try { 47 | it.execute() 48 | } catch (e: SQLException) { 49 | //Ignore exception 50 | } 51 | } 52 | schemaService = SchemaService(SourceDatabaseNameService("test"), zoneId) 53 | table = TableId(table.owner, "MY_${testCase.source.lineNumber}") 54 | 55 | } 56 | 57 | override fun beforeSpec(spec: Spec) { 58 | oracle.start() 59 | } 60 | 61 | override fun afterSpec(spec: Spec) { 62 | oracle.stop() 63 | } 64 | 65 | override fun afterTest(testCase: TestCase, result: TestResult) { 66 | connection.close() 67 | } 68 | 69 | fun String.shouldBe( 70 | schemaType: SchemaType, 71 | nullable: Boolean = true, 72 | defaultValue: T? = null, 73 | comment: String? = null 74 | ) { 75 | createTable(this, comment) 76 | val schemaDef = schemaService.getSchema(connection, table) 77 | schemaDef.shouldNotBeNull() 78 | schemaDef.table.shouldBe(table) 79 | val columnDef = schemaDef.getColumnSchemaType(columnName) 80 | columnDef.shouldBe(schemaType) 81 | val field = schemaDef.valueSchema.field(columnName) 82 | field.shouldNotBeNull() 83 | val schema = field.schema() 84 | schema.isOptional.shouldBe(nullable) 85 | schema.defaultValue().shouldBe(defaultValue) 86 | schema.doc().shouldBe(comment) 87 | } 88 | 89 | init { 90 | "test correct types" should { 91 | "byte" { 92 | "NUMBER(2,0)".shouldBe(ByteType) 93 | } 94 | "short" { 95 | "NUMBER(3,0)".shouldBe(ShortType) 96 | } 97 | "shortWithoutPrecision" { 98 | "NUMBER(4)".shouldBe(ShortType) 99 | } 100 | "int"{ 101 | "NUMBER(5,0)".shouldBe(IntType) 102 | } 103 | "long"{ 104 | "NUMBER(10,0)".shouldBe(LongType) 105 | } 106 | "BigDecimal"{ 107 | "NUMBER(20,0)".shouldBe(BigDecimalType(20,0)) 108 | } 109 | "undefined NUMBER"{ 110 | "NUMBER".shouldBe(BigDecimalType(ORACLE_UNQUALIFIED_NUMBER_PRECISION, ORACLE_UNQUALIFIED_NUMBER_SCALE)) 111 | } 112 | "Date"{ 113 | "DATE".shouldBe(DateType) 114 | } 115 | "Timestamp"{ 116 | "TIMESTAMP".shouldBe(TimestampType.TimestampWithoutTimezone(zoneId)) 117 | } 118 | "Timestamp with timezone"{ 119 | "TIMESTAMP WITH TIME ZONE".shouldBe(TimestampType.TimestampWithTimezone()) 120 | } 121 | "Timestamp with local timezone"{ 122 | "TIMESTAMP WITH LOCAL TIME ZONE".shouldBe(TimestampType.TimestampWithLocalTimezone()) 123 | } 124 | "byteDefault" { 125 | "NUMBER(2,0) default 1".shouldBe(ByteType, true, 1.toByte()) 126 | } 127 | "shortDefault" { 128 | "NUMBER(3,0) default 1".shouldBe(ShortType, true, 1.toShort()) 129 | } 130 | "intDefault"{ 131 | "NUMBER(5,0) default 1".shouldBe(IntType, true, 1) 132 | } 133 | "longDefault"{ 134 | "NUMBER(10,0) default 1".shouldBe(LongType, true, 1L) 135 | } 136 | "long default with space"{ 137 | "NUMBER(10,0) default 1 ".shouldBe(LongType, true, 1L) 138 | } 139 | "BigDecimalDefault"{ 140 | "NUMBER(20,0) default 1".shouldBe(BigDecimalType(20,0), true, BigDecimal.ONE) 141 | } 142 | "date small"{ 143 | "date".shouldBe(DateType) 144 | } 145 | "DateDefault"{ 146 | "DATE default DATE '2018-04-12'".shouldBe( 147 | DateType, 148 | true, 149 | java.util.Date.from(LocalDate.of(2018, 4, 12).atStartOfDay(ZoneOffset.UTC).toInstant()) 150 | ) 151 | } 152 | "TimestampDefault"{ 153 | "TIMESTAMP default TIMESTAMP '2018-04-12 01:00:00'".shouldBe( 154 | TimestampType.TimestampWithoutTimezone(zoneId), true, Timestamp.valueOf( 155 | LocalDateTime.of(2018, 4, 12, 1, 0, 0) 156 | ) 157 | ) 158 | } 159 | 160 | "TimestampCurrentTimestampDefault"{ 161 | "TIMESTAMP default current_timestamp".shouldBe(TimestampType.TimestampWithoutTimezone(zoneId), true) 162 | } 163 | "Timestamp sysdate default with space"{ 164 | "TIMESTAMP default sysdate ".shouldBe(TimestampType.TimestampWithoutTimezone(zoneId), true) 165 | } 166 | "Timestamp with 9 fractions"{ 167 | "TIMESTAMP(9)".shouldBe(TimestampType.TimestampWithoutTimezone(zoneId,9),true) 168 | } 169 | "markedAsNullable"{ 170 | "NUMBER(10,0)".shouldBe(LongType, true) 171 | } 172 | "markesAsNonNullable"{ 173 | "NUMBER(10,0) not null".shouldBe(LongType, false) 174 | } 175 | "hasComment"{ 176 | "NUMBER(10,0)".shouldBe(LongType, true, null, "My Comment") 177 | } 178 | "varchar2"{ 179 | "VARCHAR2(5 CHAR)".shouldBe(StringType) 180 | } 181 | "char"{ 182 | "CHAR".shouldBe(StringType) 183 | } 184 | "nchar"{ 185 | "NCHAR".shouldBe(StringType) 186 | } 187 | "nvarchar2"{ 188 | "NVARCHAR2(5)".shouldBe(StringType) 189 | } 190 | "varchar2 default"{ 191 | "VARCHAR2(5 CHAR) default 'N'".shouldBe(StringType, true, "N") 192 | } 193 | "non parsable default"{ 194 | "NUMBER(5) default 1+2".shouldBe(IntType) 195 | } 196 | } 197 | } 198 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/SourceTask.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.initial.SelectSource 4 | import com.github.thake.logminer.kafka.connect.logminer.LogminerConfiguration 5 | import com.github.thake.logminer.kafka.connect.logminer.LogminerSource 6 | import mu.KotlinLogging 7 | import org.apache.kafka.connect.errors.ConnectException 8 | import org.apache.kafka.connect.errors.DataException 9 | import org.apache.kafka.connect.source.SourceRecord 10 | import org.apache.kafka.connect.source.SourceTask 11 | import org.apache.kafka.connect.source.SourceTaskContext 12 | import java.sql.Connection 13 | import java.sql.SQLException 14 | import java.util.* 15 | 16 | private val logger = KotlinLogging.logger {} 17 | class NoConnectionToDatabase : RuntimeException() 18 | sealed class TaskState 19 | object StoppedState : TaskState() 20 | data class StartedState(val config: SourceConnectorConfig, val context: SourceTaskContext) : TaskState() { 21 | private var currentConnection: Connection? = null 22 | 23 | val connection : Connection? 24 | get() { 25 | var connection = currentConnection 26 | if(connection != null && (connection.isClosed || !connection.isValid(1000))){ 27 | connection = null 28 | } 29 | if(connection == null){ 30 | connection = config.openConnection() 31 | currentConnection = connection 32 | } 33 | return connection 34 | } 35 | var offset: Offset? 36 | val nameService: ConnectNameService = SourceDatabaseNameService(config.dbName) 37 | private val schemaService: SchemaService by lazy { 38 | SchemaService(nameService,config.dbZoneId) 39 | } 40 | private var source: Source 41 | private val sourcePartition = Collections.singletonMap(TaskConstants.LOG_MINER_OFFSET, config.dbName) 42 | private val connectSchemaFactory = ConnectSchemaFactory(nameService, isEmittingTombstones = config.isTombstonesOnDelete) 43 | 44 | init { 45 | val workingConnection = connection ?: error("No connection to database possible at startup time. Aborting.") 46 | fun getTablesForOwner(owner: String): List { 47 | return workingConnection.metaData.getTables(null, owner, null, arrayOf("TABLE")).use { 48 | val result = mutableListOf() 49 | while (it.next()) { 50 | result.add(TableId(owner, it.getString(3))) 51 | } 52 | result 53 | } 54 | } 55 | 56 | fun getTablesToFetch(): List { 57 | return config.logMinerSelectors.flatMap { 58 | when (it) { 59 | is TableSelector -> Collections.singleton(TableId(it.owner, it.tableName)) 60 | is SchemaSelector -> getTablesForOwner(it.owner) 61 | } 62 | } 63 | } 64 | 65 | fun getInitialSource(offset: Offset?): Source { 66 | return when (offset) { 67 | is OracleLogOffset -> LogminerSource( 68 | LogminerConfiguration( 69 | config.logMinerSelectors, 70 | config.logminerDictionarySource, 71 | config.batchSize, 72 | config.dbFetchSize 73 | ), schemaService, offset 74 | ) 75 | null, is SelectOffset -> 76 | SelectSource(config.batchSize, getTablesToFetch(), schemaService, offset as? SelectOffset) 77 | } 78 | } 79 | 80 | fun createOffsetFromConfig(): Offset? { 81 | return if (config.startScn > 0) { 82 | OracleLogOffset.create(config.startScn, config.startScn, false) 83 | } else { 84 | null 85 | } 86 | } 87 | 88 | val offsetMap = context.offsetStorageReader() 89 | .offset( 90 | sourcePartition 91 | ) ?: Collections.emptyMap() 92 | offset = Offset.create(offsetMap) ?: createOffsetFromConfig() 93 | source = getInitialSource(offset) 94 | } 95 | 96 | 97 | 98 | private fun createLogminerSource(): LogminerSource { 99 | val selectSource = source as? SelectSource 100 | return LogminerSource( 101 | LogminerConfiguration( 102 | config.logMinerSelectors, 103 | config.logminerDictionarySource, 104 | config.batchSize, 105 | config.dbFetchSize 106 | ), 107 | schemaService, 108 | selectSource?.getOffset()?.toOracleLogOffset() ?: OracleLogOffset.create(config.startScn, config.startScn, false) 109 | ) 110 | } 111 | 112 | 113 | fun poll(): List { 114 | logger.debug { "Polling database for new changes ..." } 115 | fun doPoll(): List { 116 | val workingConnection = connection ?: throw NoConnectionToDatabase() 117 | source.maybeStartQuery(workingConnection) 118 | val result = source.poll() 119 | //Advance the offset and source 120 | offset = source.getOffset() 121 | return result 122 | } 123 | 124 | var result = doPoll() 125 | if (source is SelectSource && result.isEmpty()) { 126 | val logminerSource = createLogminerSource() 127 | logger 128 | .info { "Initial import succeeded. Starting to read the archivelog from scn ${logminerSource.getOffset().commitScn}" } 129 | source = logminerSource 130 | result = doPoll() 131 | } 132 | //Convert the records to SourceRecords 133 | return result.flatMap { 134 | try { 135 | connectSchemaFactory.convertToSourceRecords(it, sourcePartition) 136 | } catch (e: DataException) { 137 | logger 138 | .warn(e) { "Couldn't convert record $it to schema. This most probably indicates that supplemental logging is not activated for all columns. This record will be skipped." } 139 | emptyList() 140 | } 141 | }.also { 142 | if (it.isEmpty()) { 143 | logger 144 | .debug { "No new changes found. Waiting ${config.pollInterval.toMillis()} ms until next poll attempt." } 145 | Thread.sleep(config.pollInterval.toMillis()) 146 | } else { 147 | logger.info { "Found ${it.size} new changes. Submitting them to kafka." } 148 | } 149 | } 150 | } 151 | 152 | fun stop() { 153 | logger.info { "Kafka connect oracle task will be stopped" } 154 | this.source.close() 155 | this.connection?.close() 156 | } 157 | 158 | } 159 | 160 | object TaskConstants { 161 | const val LOG_MINER_OFFSET = "logminer" 162 | } 163 | 164 | /** 165 | * @author Thorsten Hake (mail@thorsten-hake.com) 166 | */ 167 | class SourceTask : SourceTask() { 168 | private var state: TaskState = StoppedState 169 | 170 | override fun version() = "1.0" 171 | 172 | 173 | override fun start(map: Map) { 174 | state = StartedState(SourceConnectorConfig(map), context).apply { 175 | logger.info { "Oracle Kafka Connector is starting" } 176 | try { 177 | logger.debug { "Starting LogMiner Session" } 178 | this.connection 179 | logger.debug { "Logminer started successfully" } 180 | } catch (e: SQLException) { 181 | throw ConnectException("Error at database tier, Please check : $e") 182 | } 183 | } 184 | } 185 | 186 | @Throws(InterruptedException::class) 187 | override fun poll(): List { 188 | return try { 189 | val currState = state 190 | if (currState is StartedState) currState.poll() else throw IllegalStateException("Task has not been started") 191 | } catch (e: SQLException) { 192 | logger.info(e) { "SQLException thrown. This is most probably due to an error while stopping." } 193 | Collections.emptyList() 194 | } catch (e : NoConnectionToDatabase){ 195 | logger.info(e) {"Currently no connection to the database can be established. Returning 0 records to kafka."} 196 | Collections.emptyList() 197 | } 198 | } 199 | 200 | 201 | override fun stop() { 202 | logger.info { "Stop called for logminer" } 203 | (state as? StartedState)?.let { 204 | it.stop() 205 | logger.info { "Stopped logminer" } 206 | state = StoppedState 207 | } 208 | } 209 | } -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | # 21 | # Gradle start up script for POSIX generated by Gradle. 22 | # 23 | # Important for running: 24 | # 25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 26 | # noncompliant, but you have some other compliant shell such as ksh or 27 | # bash, then to run this script, type that shell name before the whole 28 | # command line, like: 29 | # 30 | # ksh Gradle 31 | # 32 | # Busybox and similar reduced shells will NOT work, because this script 33 | # requires all of these POSIX shell features: 34 | # * functions; 35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 37 | # * compound commands having a testable exit status, especially «case»; 38 | # * various built-in commands including «command», «set», and «ulimit». 39 | # 40 | # Important for patching: 41 | # 42 | # (2) This script targets any POSIX shell, so it avoids extensions provided 43 | # by Bash, Ksh, etc; in particular arrays are avoided. 44 | # 45 | # The "traditional" practice of packing multiple parameters into a 46 | # space-separated string is a well documented source of bugs and security 47 | # problems, so this is (mostly) avoided, by progressively accumulating 48 | # options in "$@", and eventually passing that to Java. 49 | # 50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 52 | # see the in-line comments for details. 53 | # 54 | # There are tweaks for specific operating systems such as AIX, CygWin, 55 | # Darwin, MinGW, and NonStop. 56 | # 57 | # (3) This script is generated from the Groovy template 58 | # https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 59 | # within the Gradle project. 60 | # 61 | # You can find Gradle at https://github.com/gradle/gradle/. 62 | # 63 | ############################################################################## 64 | 65 | # Attempt to set APP_HOME 66 | 67 | # Resolve links: $0 may be a link 68 | app_path=$0 69 | 70 | # Need this for daisy-chained symlinks. 71 | while 72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 73 | [ -h "$app_path" ] 74 | do 75 | ls=$( ls -ld "$app_path" ) 76 | link=${ls#*' -> '} 77 | case $link in #( 78 | /*) app_path=$link ;; #( 79 | *) app_path=$APP_HOME$link ;; 80 | esac 81 | done 82 | 83 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit 84 | 85 | APP_NAME="Gradle" 86 | APP_BASE_NAME=${0##*/} 87 | 88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 118 | 119 | 120 | # Determine the Java command to use to start the JVM. 121 | if [ -n "$JAVA_HOME" ] ; then 122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 123 | # IBM's JDK on AIX uses strange locations for the executables 124 | JAVACMD=$JAVA_HOME/jre/sh/java 125 | else 126 | JAVACMD=$JAVA_HOME/bin/java 127 | fi 128 | if [ ! -x "$JAVACMD" ] ; then 129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 130 | 131 | Please set the JAVA_HOME variable in your environment to match the 132 | location of your Java installation." 133 | fi 134 | else 135 | JAVACMD=java 136 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 137 | 138 | Please set the JAVA_HOME variable in your environment to match the 139 | location of your Java installation." 140 | fi 141 | 142 | # Increase the maximum file descriptors if we can. 143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 144 | case $MAX_FD in #( 145 | max*) 146 | MAX_FD=$( ulimit -H -n ) || 147 | warn "Could not query maximum file descriptor limit" 148 | esac 149 | case $MAX_FD in #( 150 | '' | soft) :;; #( 151 | *) 152 | ulimit -n "$MAX_FD" || 153 | warn "Could not set maximum file descriptor limit to $MAX_FD" 154 | esac 155 | fi 156 | 157 | # Collect all arguments for the java command, stacking in reverse order: 158 | # * args from the command line 159 | # * the main class name 160 | # * -classpath 161 | # * -D...appname settings 162 | # * --module-path (only if needed) 163 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 164 | 165 | # For Cygwin or MSYS, switch paths to Windows format before running java 166 | if "$cygwin" || "$msys" ; then 167 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 168 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 169 | 170 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 171 | 172 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 173 | for arg do 174 | if 175 | case $arg in #( 176 | -*) false ;; # don't mess with options #( 177 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 178 | [ -e "$t" ] ;; #( 179 | *) false ;; 180 | esac 181 | then 182 | arg=$( cygpath --path --ignore --mixed "$arg" ) 183 | fi 184 | # Roll the args list around exactly as many times as the number of 185 | # args, so each arg winds up back in the position where it started, but 186 | # possibly modified. 187 | # 188 | # NB: a `for` loop captures its iteration list before it begins, so 189 | # changing the positional parameters here affects neither the number of 190 | # iterations, nor the values presented in `arg`. 191 | shift # remove old arg 192 | set -- "$@" "$arg" # push replacement arg 193 | done 194 | fi 195 | 196 | # Collect all arguments for the java command; 197 | # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of 198 | # shell script including quotes and variable substitutions, so put them in 199 | # double quotes to make sure that they get re-expanded; and 200 | # * put everything else in single quotes, so that it's not re-expanded. 201 | 202 | set -- \ 203 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 204 | -classpath "$CLASSPATH" \ 205 | org.gradle.wrapper.GradleWrapperMain \ 206 | "$@" 207 | 208 | # Use "xargs" to parse quoted args. 209 | # 210 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 211 | # 212 | # In Bash we could simply go: 213 | # 214 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 215 | # set -- "${ARGS[@]}" "$@" 216 | # 217 | # but POSIX shell has neither arrays nor command substitution, so instead we 218 | # post-process each arg (as a line of input to sed) to backslash-escape any 219 | # character that might be a shell metacharacter, then use eval to reverse 220 | # that process (while maintaining the separation between arguments), and wrap 221 | # the whole thing up as a single "set" statement. 222 | # 223 | # This will of course break if any of these variables contains a newline or 224 | # an unmatched quote. 225 | # 226 | 227 | eval "set -- $( 228 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 229 | xargs -n1 | 230 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 231 | tr '\n' ' ' 232 | )" '"$@"' 233 | 234 | exec "$JAVACMD" "$@" 235 | -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/logminer/Transaction.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.logminer 2 | 3 | import com.github.thake.logminer.kafka.connect.Operation 4 | import com.github.thake.logminer.kafka.connect.SchemaDefinition 5 | import com.github.thake.logminer.kafka.connect.SchemaService 6 | import com.github.thake.logminer.kafka.connect.TableId 7 | import mu.KotlinLogging 8 | import net.openhft.chronicle.queue.ChronicleQueue 9 | import net.openhft.chronicle.queue.ExcerptAppender 10 | import net.openhft.chronicle.queue.ExcerptTailer 11 | import java.nio.file.Files 12 | import java.sql.Connection 13 | import java.sql.Timestamp 14 | 15 | private val logger = KotlinLogging.logger {} 16 | 17 | class Transaction( 18 | private val queueFactory: ((xid: String) -> ChronicleQueue), 19 | private val conn: Connection, 20 | initialRecord: LogminerRow.Change, 21 | private val schemaService: SchemaService, 22 | private val maxRecordsInMemory: Int = 10 23 | ) { 24 | val xid: String = initialRecord.transaction 25 | val transactionSchemas: MutableMap = mutableMapOf() 26 | val alreadyRefreshedSchemas : MutableSet = mutableSetOf() 27 | var lastReadRowId = initialRecord.rowIdentifier.rowId 28 | var minScn = initialRecord.rowIdentifier.scn 29 | private set 30 | var commitScn: Long? = null 31 | private set 32 | var size: Long = 0 33 | private set 34 | private var storage: TransactionStorage = TransactionStorage.Memory() 35 | 36 | private enum class QueueState { WRITE, READ, CLOSED } 37 | 38 | private var state: QueueState = 39 | QueueState.WRITE 40 | private var stillToRead: Long = 0 41 | val hasMoreRecords 42 | get() = size > 0 && (state == QueueState.WRITE || stillToRead > 0) 43 | private var lastTimestamp: Long = System.currentTimeMillis() 44 | 45 | init { 46 | addChange(initialRecord) 47 | } 48 | 49 | fun commit(commit: LogminerRow.Commit) { 50 | this.state = QueueState.READ 51 | this.stillToRead = size 52 | this.commitScn = commit.rowIdentifier.scn 53 | } 54 | 55 | fun rollback() { 56 | this.close() 57 | } 58 | 59 | private fun ensureStorageCapacity() { 60 | val currentStorage = storage 61 | if (currentStorage is TransactionStorage.Memory && size >= maxRecordsInMemory) { 62 | val filesystemStorage = TransactionStorage.Filesystem(queueFactory.invoke(xid)) 63 | //Copy already stored entries to filesystemStorage 64 | (0 until size).forEach { 65 | val change = currentStorage.readChange() 66 | ?: throw IllegalStateException("Change record is missing in current storage engine. Trying to read entry #$it") 67 | filesystemStorage.addChange(change) 68 | } 69 | storage = filesystemStorage 70 | } 71 | } 72 | 73 | fun addChange(record: LogminerRow.Change) { 74 | if (state != QueueState.WRITE) { 75 | throw java.lang.IllegalStateException("No new record can be added to the queue. The queue has already been read.") 76 | } 77 | lastReadRowId = record.rowIdentifier.rowId 78 | //First retrieve the current schema if it is not already stored 79 | if (!transactionSchemas.containsKey(record.table)) { 80 | transactionSchemas[record.table] = schemaService.getSchema(conn, record.table) 81 | } 82 | ensureStorageCapacity() 83 | storage.addChange(record) 84 | size++ 85 | logPerformanceStatistics() 86 | } 87 | 88 | /** 89 | * Updates the schema if it may be outdated. A schema for a tableId will only be updated once per transaction. 90 | * 91 | * This method is thread safe and may be called by multiple threads. 92 | */ 93 | fun updateSchemaIfOutdated(tableId : TableId){ 94 | synchronized(this) { 95 | if(tableId !in alreadyRefreshedSchemas) { 96 | transactionSchemas[tableId] = schemaService.refreshSchema(conn, tableId) 97 | alreadyRefreshedSchemas.add(tableId) 98 | } 99 | } 100 | } 101 | 102 | private fun logPerformanceStatistics() { 103 | if (size > 1000 && size % 1000 == 0L) { 104 | logger.debug { 105 | val now = System.currentTimeMillis() 106 | val processingDuration = now - lastTimestamp 107 | val entriesPerSecond = 1000.0 / (processingDuration / 1000.0) 108 | lastTimestamp = now 109 | "Processing large transaction $xid. Currently loaded $size entries. Processed ${entriesPerSecond.format(2)} records per second." 110 | } 111 | } 112 | } 113 | 114 | private fun Double.format(digits: Int) = "%.${digits}f".format(this) 115 | 116 | fun readRecords(maxSize: Int): List { 117 | if (state != QueueState.READ) { 118 | throw java.lang.IllegalStateException("Transaction is not yet committed or already closed. Can't read in this state.") 119 | } 120 | 121 | var continueRead = true 122 | val loadedRecords = mutableListOf() 123 | while (loadedRecords.size < maxSize && continueRead) { 124 | val loadedRecord = storage.readChange() 125 | if (loadedRecord == null) { 126 | continueRead = false 127 | } else { 128 | loadedRecords.add(loadedRecord) 129 | stillToRead-- 130 | } 131 | } 132 | if (stillToRead <= 0L) { 133 | close() 134 | } 135 | 136 | return loadedRecords 137 | } 138 | 139 | fun close() { 140 | if (state != QueueState.CLOSED) { 141 | this.storage.cleanup() 142 | this.state = QueueState.CLOSED 143 | } 144 | } 145 | } 146 | 147 | private sealed class TransactionStorage { 148 | 149 | abstract fun addChange(change: LogminerRow.Change) 150 | abstract fun readChange(): LogminerRow.Change? 151 | abstract fun cleanup() 152 | class Memory : TransactionStorage() { 153 | private val backing = mutableListOf() 154 | private var currentIterater: Iterator? = null 155 | override fun addChange(change: LogminerRow.Change) { 156 | backing.add(change) 157 | } 158 | 159 | override fun readChange(): LogminerRow.Change? { 160 | var iterator = currentIterater 161 | if (iterator == null) { 162 | iterator = backing.iterator() 163 | currentIterater = iterator 164 | } 165 | return if (iterator.hasNext()) iterator.next() else null 166 | } 167 | 168 | override fun cleanup() { 169 | backing.clear() 170 | } 171 | } 172 | 173 | class Filesystem(private val queue: ChronicleQueue) : TransactionStorage() { 174 | 175 | private val appender: ExcerptAppender = queue.acquireAppender() 176 | private var trailer: ExcerptTailer? = null 177 | 178 | override fun addChange(change: LogminerRow.Change) { 179 | appender.writeDocument { 180 | with(it) { 181 | valueOut.int64(change.rowIdentifier.scn) 182 | valueOut.text(change.rowIdentifier.rowId) 183 | valueOut.text(change.sqlRedo) 184 | valueOut.text(change.transaction) 185 | valueOut.text(change.username) 186 | valueOut.text(change.operation.name) 187 | valueOut.text(change.table.owner) 188 | valueOut.text(change.table.table) 189 | valueOut.int64(change.timestamp.time) 190 | } 191 | } 192 | } 193 | 194 | override fun readChange(): LogminerRow.Change? { 195 | var trailer = this.trailer 196 | if (trailer == null) { 197 | trailer = queue.createTailer() 198 | this.trailer = trailer 199 | } 200 | var loadedDocument: LogminerRow.Change? = null 201 | trailer.readDocument { 202 | loadedDocument = 203 | with(it) { 204 | val rowIdentifier = LogminerRowIdentifier(valueIn.int64(), valueIn.text()!!) 205 | val sqlRedo = valueIn.text()!! 206 | val transaction = valueIn.text()!! 207 | val username = valueIn.text()!! 208 | val operation = Operation.valueOf(valueIn.text()!!) 209 | val table = TableId( 210 | valueIn.text()!!, 211 | valueIn.text()!! 212 | ) 213 | val timestamp = Timestamp(valueIn.int64()) 214 | LogminerRow.Change( 215 | rowIdentifier = rowIdentifier, 216 | timestamp = timestamp, 217 | sqlRedo = sqlRedo, 218 | transaction = transaction, 219 | table = table, 220 | username = username, 221 | operation = operation 222 | ) 223 | } 224 | } 225 | return loadedDocument 226 | } 227 | 228 | override fun cleanup() { 229 | val queueDirToDelete = this.queue.file() 230 | this.queue.close() 231 | //Delete all files 232 | if (queueDirToDelete.exists()) { 233 | Files.walk(queueDirToDelete.toPath()).sorted( 234 | Comparator.reverseOrder() 235 | ) 236 | .forEach { Files.delete(it) } 237 | } 238 | } 239 | } 240 | } -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/SourceConnectorConfig.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import mu.KotlinLogging 4 | import org.apache.kafka.common.config.AbstractConfig 5 | import org.apache.kafka.common.config.ConfigDef 6 | import org.apache.kafka.common.config.ConfigDef.Importance 7 | import java.sql.Connection 8 | import java.sql.DriverManager 9 | import java.sql.SQLException 10 | import java.time.Duration 11 | import java.time.ZoneId 12 | 13 | private val logger = KotlinLogging.logger {} 14 | 15 | sealed class LogMinerSelector 16 | data class TableSelector(val owner: String, val tableName: String) : LogMinerSelector() 17 | data class SchemaSelector(val owner: String) : LogMinerSelector() 18 | enum class LogminerDictionarySource { 19 | ONLINE, REDO_LOG 20 | } 21 | 22 | class SourceConnectorConfig( 23 | config: ConfigDef?, 24 | parsedConfig: Map 25 | ) : AbstractConfig(config, parsedConfig) { 26 | constructor(parsedConfig: Map) : this( 27 | conf(), 28 | parsedConfig 29 | ) 30 | 31 | fun openConnection() : Connection? { 32 | val dbUri = "${dbHostName}:${dbPort}/${dbSid}" 33 | fun doOpenConnection(): Connection { 34 | return DriverManager.getConnection( 35 | "jdbc:oracle:thin:@$dbUri", 36 | dbUser, dbPassword 37 | ).also { 38 | logger.info { "Connected to database at $dbUri" } 39 | } 40 | } 41 | 42 | var currentAttempt = 0 43 | var connection: Connection? = null 44 | while (currentAttempt < dbAttempts && connection == null) { 45 | if (currentAttempt > 0) { 46 | logger.info { "Waiting ${dbBackoff.toMillis()} ms before next attempt to acquire a connection" } 47 | Thread.sleep(dbBackoff.toMillis()) 48 | } 49 | currentAttempt++ 50 | try { 51 | connection = doOpenConnection() 52 | } catch (e: SQLException) { 53 | logger.error(e) { "Couldn't connect to database with url $dbUri. Attempt $currentAttempt." } 54 | 55 | } 56 | } 57 | return connection 58 | } 59 | 60 | 61 | val dbSid: String 62 | get() = getString(DB_SID) 63 | 64 | val dbHostName: String 65 | get() = getString(DB_HOST) 66 | 67 | val dbPort: Int 68 | get() = getInt(DB_PORT) 69 | 70 | val dbUser: String 71 | get() = getString(DB_USERNAME) 72 | 73 | val dbPassword: String 74 | get() = getString(DB_PASSWORD) 75 | 76 | val dbName: String 77 | get() = getString(DB_NAME) 78 | 79 | val dbZoneId: ZoneId 80 | get() = ZoneId.of(getString(DB_TIMEZONE)) 81 | 82 | 83 | val logminerDictionarySource: LogminerDictionarySource 84 | get() = LogminerDictionarySource.valueOf(getString(DB_LOGMINER_DICTIONARY)) 85 | 86 | val monitoredTables: List 87 | get() = getString(MONITORED_TABLES).split(",").map { it.trim() } 88 | 89 | val logMinerSelectors: List 90 | get() = monitoredTables.map { 91 | val parts = it.split(".") 92 | if (parts.size > 1) { 93 | TableSelector(parts[0], parts[1]) 94 | } else { 95 | SchemaSelector(parts[0]) 96 | } 97 | } 98 | 99 | val batchSize: Int 100 | get() = getInt(BATCH_SIZE) 101 | val dbFetchSize: Int 102 | get() = getInt(DB_FETCH_SIZE) ?: batchSize 103 | 104 | 105 | val startScn: Long 106 | get() = getLong(START_SCN) ?: 0 107 | 108 | 109 | val pollInterval: Duration 110 | get() = Duration.ofMillis(getLong(POLL_INTERVAL_MS)) 111 | 112 | val dbBackoff: Duration 113 | get() = Duration.ofMillis(getLong(DB_BACKOFF_MS)) 114 | 115 | val dbAttempts: Int 116 | get() = getInt(DB_ATTEMPTS) 117 | 118 | val isTombstonesOnDelete : Boolean 119 | get() = getBoolean(TOMBSTONES_ON_DELETE) 120 | 121 | companion object { 122 | const val DB_NAME = "db.name" 123 | const val DB_SID = "db.sid" 124 | const val DB_HOST = "db.hostname" 125 | const val DB_PORT = "db.port" 126 | const val DB_USERNAME = "db.user" 127 | const val DB_PASSWORD = "db.user.password" 128 | const val DB_ATTEMPTS = "db.attempts" 129 | const val DB_BACKOFF_MS = "db.backoff.ms" 130 | const val DB_LOGMINER_DICTIONARY = "db.logminer.dictionary" 131 | const val DB_TIMEZONE = "db.timezone" 132 | const val MONITORED_TABLES = "table.whitelist" 133 | const val DB_FETCH_SIZE = "db.fetch.size" 134 | const val START_SCN = "start.scn" 135 | const val BATCH_SIZE = "batch.size" 136 | const val POLL_INTERVAL_MS = "poll.interval.ms" 137 | const val TOMBSTONES_ON_DELETE = "tombstones.on.delete" 138 | 139 | fun conf(): ConfigDef { 140 | return ConfigDef() 141 | .define( 142 | DB_NAME, 143 | ConfigDef.Type.STRING, 144 | Importance.HIGH, 145 | "Logical name of the database. This name will be used as a prefix for the topic. You can choose this name as you like." 146 | ) 147 | .define( 148 | DB_SID, 149 | ConfigDef.Type.STRING, 150 | Importance.HIGH, 151 | "Database SID" 152 | ) 153 | .define( 154 | DB_HOST, 155 | ConfigDef.Type.STRING, 156 | Importance.HIGH, 157 | "Database hostname" 158 | ) 159 | .define( 160 | DB_PORT, 161 | ConfigDef.Type.INT, 162 | Importance.HIGH, 163 | "Database port (usually 1521)" 164 | ) 165 | .define( 166 | DB_USERNAME, 167 | ConfigDef.Type.STRING, 168 | Importance.HIGH, 169 | "Database user" 170 | ) 171 | .define( 172 | DB_PASSWORD, 173 | ConfigDef.Type.STRING, 174 | Importance.HIGH, 175 | "Database password" 176 | ) 177 | .define( 178 | DB_LOGMINER_DICTIONARY, 179 | ConfigDef.Type.STRING, 180 | LogminerDictionarySource.ONLINE.name, 181 | Importance.LOW, 182 | "Type of logminer dictionary that should be used. Valid values: " + LogminerDictionarySource.values() 183 | .joinToString { it.name } 184 | ) 185 | .define( 186 | DB_TIMEZONE, 187 | ConfigDef.Type.STRING, 188 | "UTC", 189 | Importance.HIGH, 190 | "The timezone in which TIMESTAMP columns (without any timezone information) should be interpreted as. Valid values are all values that can be passed to https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#of-java.lang.String-" 191 | ) 192 | .define( 193 | MONITORED_TABLES, 194 | ConfigDef.Type.STRING, 195 | "", 196 | Importance.HIGH, 197 | "Tables that should be monitored, separated by ','. Tables have to be specified with schema. Table names are case-sensitive (e.g. if your table name is an unquoted identifier, you'll need to specify it in all caps). You can also just " + 198 | "specify a schema to indicate that all tables within that schema should be monitored. Examples: 'MY_USER.TABLE, OTHER_SCHEMA'." 199 | ) 200 | .define( 201 | TOMBSTONES_ON_DELETE, 202 | ConfigDef.Type.BOOLEAN, 203 | true, 204 | Importance.HIGH, 205 | "If set to false, no tombstone records will be emitted after a delete operation." 206 | ) 207 | .define( 208 | BATCH_SIZE, 209 | ConfigDef.Type.INT, 210 | 1000, 211 | Importance.HIGH, 212 | "Batch size of rows that should be fetched in one batch" 213 | ) 214 | .define( 215 | DB_FETCH_SIZE, 216 | ConfigDef.Type.INT, 217 | null, 218 | Importance.MEDIUM, 219 | "JDBC result set prefetch size. If not set, it will be defaulted to batch.size. The fetch" + 220 | " should not be smaller than the batch size." 221 | ) 222 | .define( 223 | START_SCN, 224 | ConfigDef.Type.LONG, 225 | 0L, 226 | Importance.HIGH, 227 | "Start SCN, if set to 0 an initial intake from the tables will be performed." 228 | ) 229 | .define( 230 | DB_ATTEMPTS, 231 | ConfigDef.Type.INT, 232 | 3, 233 | Importance.LOW, 234 | "Maximum number of attempts to retrieve a valid JDBC connection." 235 | ) 236 | .define( 237 | DB_BACKOFF_MS, 238 | ConfigDef.Type.LONG, 239 | 10000L, 240 | Importance.LOW, 241 | "Backoff time in milliseconds between connection attempts." 242 | ) 243 | .define( 244 | POLL_INTERVAL_MS, 245 | ConfigDef.Type.LONG, 246 | 2000L, 247 | Importance.LOW, 248 | "Positive integer value that specifies the number of milliseconds the connector should wait after a polling attempt didn't retrieve any results." 249 | ) 250 | } 251 | } 252 | } 253 | 254 | fun main() { 255 | println(SourceConnectorConfig.conf().toEnrichedRst()) 256 | } -------------------------------------------------------------------------------- /src/test/kotlin/com/github/thake/logminer/kafka/connect/SourceTaskTest.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import io.kotest.matchers.collections.shouldBeEmpty 4 | import io.kotest.matchers.collections.shouldHaveSize 5 | import org.apache.kafka.connect.source.SourceRecord 6 | import org.apache.kafka.connect.source.SourceTaskContext 7 | import org.apache.kafka.connect.storage.OffsetStorageReader 8 | import org.junit.jupiter.api.AfterEach 9 | import org.junit.jupiter.api.Assertions.assertEquals 10 | import org.junit.jupiter.api.Assertions.assertTrue 11 | import org.junit.jupiter.api.BeforeEach 12 | import org.junit.jupiter.api.Test 13 | import org.slf4j.LoggerFactory 14 | import org.testcontainers.junit.jupiter.Testcontainers 15 | import java.sql.Connection 16 | import java.util.* 17 | 18 | @Testcontainers 19 | class SourceTaskTest : AbstractIntegrationTest() { 20 | private lateinit var sourceTask: SourceTask 21 | private lateinit var offsetManager: MockOffsetStorageReader 22 | private lateinit var defaultConfig: Map 23 | private val log = LoggerFactory.getLogger(SourceTaskTest::class.java) 24 | 25 | private class TestSourceTaskContext( 26 | val configs: Map, 27 | private val storageReader: OffsetStorageReader = MockOffsetStorageReader() 28 | ) : SourceTaskContext { 29 | 30 | override fun configs(): MutableMap { 31 | return this.configs.toMutableMap() 32 | } 33 | 34 | override fun offsetStorageReader(): OffsetStorageReader { 35 | return storageReader 36 | } 37 | 38 | } 39 | 40 | private class MockOffsetStorageReader : OffsetStorageReader { 41 | private var currentOffset = mutableMapOf() 42 | fun updateOffset(offset: MutableMap) { 43 | currentOffset = offset 44 | } 45 | 46 | override fun offsets(partitions: MutableCollection>?): MutableMap, MutableMap> { 47 | return Collections.emptyMap() 48 | } 49 | 50 | override fun offset(partition: MutableMap?): MutableMap { 51 | return currentOffset 52 | } 53 | 54 | } 55 | 56 | @BeforeEach 57 | fun setup() { 58 | defaultConfig = 59 | with(SourceConnectorConfig.Companion) { 60 | mapOf( 61 | BATCH_SIZE to "1000", 62 | DB_NAME to "test", 63 | DB_FETCH_SIZE to "10000", 64 | DB_SID to oracle.sid, 65 | DB_HOST to oracle.containerIpAddress, 66 | DB_PORT to oracle.oraclePort.toString(), 67 | DB_USERNAME to oracle.username, 68 | DB_PASSWORD to oracle.password, 69 | START_SCN to "0", 70 | MONITORED_TABLES to STANDARD_TABLE.fullName + ", " + SECOND_TABLE.fullName 71 | ) 72 | } 73 | sourceTask = SourceTask() 74 | offsetManager = MockOffsetStorageReader() 75 | sourceTask.initialize(TestSourceTaskContext(defaultConfig, offsetManager)) 76 | //Wait for tables to correctly initialize 77 | Thread.sleep(5000) 78 | } 79 | 80 | private fun createConfiguration(map: Map? = null): Map { 81 | return defaultConfig.toMutableMap().apply { map?.let { putAll(it) } } 82 | } 83 | 84 | @AfterEach 85 | fun tearDown() { 86 | sourceTask.stop() 87 | } 88 | 89 | @Test 90 | fun testNoScnConfigured() { 91 | sourceTask.start( 92 | createConfiguration( 93 | mapOf( 94 | SourceConnectorConfig.BATCH_SIZE to "10" 95 | ) 96 | ) 97 | ) 98 | val modifyingConnection = openConnection() 99 | //Initial state 100 | (0 until 100).forEach { modifyingConnection.insertRow(it) } 101 | val result = sourceTask.poll().toMutableList() 102 | assertTrue(result.isNotEmpty()) 103 | //Check that the batch size is correct 104 | assertEquals(10, result.size) 105 | //Now add new rows 106 | (100 until 200).forEach { modifyingConnection.insertRow(it) } 107 | //Now continue reading until poll returns an empty list 108 | result.addAll(sourceTask.readAllSourceRecords()) 109 | assertEquals(200, result.size) 110 | } 111 | 112 | private fun getCurrentScn(conn: Connection): Long { 113 | @Suppress("SqlResolve") 114 | return conn.prepareStatement("select CURRENT_SCN from V${'$'}DATABASE").use { stmt -> 115 | stmt.executeQuery().use { 116 | it.next() 117 | it.getLong(1) 118 | } 119 | } 120 | } 121 | 122 | @Test 123 | fun testInitialScnToCurrentLogSet() { 124 | val modifyingConnection = openConnection() 125 | //Initial state 126 | (0 until 100).forEach { modifyingConnection.insertRow(it) } 127 | val currentScn = getCurrentScn(modifyingConnection) 128 | sourceTask.start( 129 | createConfiguration( 130 | with(SourceConnectorConfig.Companion) { 131 | mapOf( 132 | BATCH_SIZE to "10", 133 | START_SCN to currentScn.toString() 134 | ) 135 | } 136 | ) 137 | ) 138 | val result = sourceTask.poll().toMutableList() 139 | assertTrue(result.isEmpty()) 140 | //Now add new rows 141 | (100 until 200).forEach { modifyingConnection.insertRow(it) } 142 | //Now continue reading until poll returns an empty list 143 | result.addAll(sourceTask.readAllSourceRecords()) 144 | assertEquals(100, result.size) 145 | result.forEach { record -> 146 | assertEquals(CDC_TYPE, record.sourceOffset()["type"]) 147 | } 148 | 149 | } 150 | 151 | @Test 152 | fun testInitialScnToOne() { 153 | val modifyingConnection = openConnection() 154 | //Initial state 155 | (0 until 100).forEach { modifyingConnection.insertRow(it) } 156 | val currentScn = 1L 157 | sourceTask.start( 158 | createConfiguration( 159 | with(SourceConnectorConfig.Companion) { 160 | mapOf( 161 | BATCH_SIZE to "10", 162 | START_SCN to currentScn.toString() 163 | ) 164 | } 165 | ) 166 | ) 167 | val result = sourceTask.readAllSourceRecords().toMutableList() 168 | assertEquals(100, result.size, "Result does not contain the same size as the number of inserted entries.") 169 | //Now add new rows 170 | (100 until 200).forEach { modifyingConnection.insertRow(it) } 171 | //Now continue reading until poll returns an empty list 172 | result.addAll(sourceTask.readAllSourceRecords()) 173 | assertEquals(200, result.size) 174 | result.forEach { record -> 175 | assertEquals(CDC_TYPE, record.sourceOffset()["type"]) 176 | } 177 | 178 | } 179 | 180 | @Test 181 | fun testRestartInInitialImport() { 182 | sourceTask.start( 183 | createConfiguration( 184 | mapOf( 185 | SourceConnectorConfig.BATCH_SIZE to "10" 186 | ) 187 | ) 188 | ) 189 | val modifyingConnection = openConnection() 190 | //Initial state 191 | (0 until 100).forEach { modifyingConnection.insertRow(it, SECOND_TABLE) } 192 | val result = sourceTask.poll().toMutableList() 193 | 194 | //Check that the batch size is correct 195 | assertEquals(10, result.size) 196 | //Now stop the source 197 | sourceTask.stop() 198 | offsetManager.updateOffset(result.last().sourceOffset().toMutableMap()) 199 | sourceTask.start( 200 | createConfiguration( 201 | mapOf( 202 | SourceConnectorConfig.BATCH_SIZE to "1000" 203 | ) 204 | ) 205 | ) 206 | //Now add new rows 207 | (100 until 200).forEach { modifyingConnection.insertRow(it) } 208 | //Now continue reading until poll returns an empty list 209 | result.addAll(sourceTask.readAllSourceRecords()) 210 | assertEquals(200, result.size) 211 | } 212 | @Test 213 | fun testResumeDuringCDCAfterDbConnectionLost() { 214 | sourceTask.start( 215 | createConfiguration( 216 | mapOf( 217 | SourceConnectorConfig.BATCH_SIZE to "10" 218 | ) 219 | ) 220 | ) 221 | val modifyingConnection = openConnection() 222 | //Initial state 223 | (0 until 10).forEach { modifyingConnection.insertRow(it, SECOND_TABLE) } 224 | val result = sourceTask.poll().toMutableList() 225 | 226 | //Check that the batch size is correct 227 | result.shouldHaveSize(10) 228 | 229 | //Now add new rows 230 | (100 until 200).forEach { modifyingConnection.insertRow(it) } 231 | //Fetch the next 10 rows. These should be the first cdc rows 232 | result.addAll(sourceTask.poll()) 233 | result.shouldHaveSize(20) 234 | 235 | log.info("Stopping oracle DB to simulate a lost connection") 236 | val stopResult = oracle.execInContainer("/bin/bash","-c","service oracle-xe stop") 237 | log.info("Stop exited with code ${stopResult.exitCode} and log output: ${stopResult.stdout} Err: ${stopResult.stderr}") 238 | //try to poll now. Should return in an empty result 239 | val expectedEmptyResult = sourceTask.poll() 240 | expectedEmptyResult.shouldBeEmpty() 241 | //Starting again 242 | val startResult = oracle.execInContainer("/bin/bash", "-c", "service oracle-xe start") 243 | log.info("Start exited with code ${startResult.exitCode} and log output: ${startResult.stdout} Err: ${startResult.stderr}") 244 | 245 | //Now continue reading until poll returns an empty list 246 | result.addAll(sourceTask.readAllSourceRecords()) 247 | assertEquals(110, result.size) 248 | } 249 | 250 | private fun SourceTask.readAllSourceRecords(): List { 251 | val result = mutableListOf() 252 | while (true) { 253 | val currentResult = poll() 254 | if (currentResult.isEmpty()) { 255 | break 256 | } else { 257 | result.addAll(currentResult) 258 | } 259 | } 260 | return result 261 | } 262 | } -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Logminer Kafka Connect 2 | 3 | Logminer Kafka Connect is a CDC Kafka Connect source for Oracle Databases (tested with Oracle 11.2.0.4). 4 | 5 | Changes are extracted from the Archivelog using [Oracle Logminer](https://docs.oracle.com/cd/B19306_01/server.102/b14215/logminer.htm). 6 | 7 | - [Logminer Kafka Connect](#logminer-kafka-connect) 8 | * [Features](#features) 9 | * [Installation](#installation) 10 | + [Docker](#docker) 11 | + [Oracle Database Configuration Requirements](#oracle-database-configuration-requirements) 12 | * [Initial Import](#initial-import) 13 | * [Change Types](#change-types) 14 | + [Value Struct](#value-struct) 15 | + [Source struct](#source-struct) 16 | * [Configuration](#configuration) 17 | * [Limitations](#limitations) 18 | 19 | ## Features 20 | 21 | Stable features: 22 | - Insert, Update and Delete changes will be tracked on configured tables 23 | - Logminer without "CONTINUOUS_MINE", thus in theory being compatible with Oracle 19c (not tested) 24 | - Initial import of the current table state 25 | - Only based on Oracle features that are available in Oracle XE (and thus available in all Oracle versions). No 26 | Oracle GoldenGate license required! 27 | - Reading schema changes from the Online Dictionary. See 28 | https://docs.oracle.com/cd/B19306_01/server.102/b14215/logminer.htm#i1014687 for more information. 29 | 30 | Unstable features: 31 | - Reading schema changes from the Archive-Log 32 | 33 | Planned features: 34 | - More documentation :) 35 | 36 | ## Installation 37 | You can install this Kafka Connect component into Kafka Connect using the [Confluent Hub Client](https://docs.confluent.io/current/connect/managing/confluent-hub/client.html). Additionally, you need to download the Oracle 38 | JDBC driver and put it on the classpath of Logminer Kafka Connect. The JDBC driver can't be included in the Logminer Kafka Connect release 39 | as its license does not allow this. 40 | 41 | The following script will install Logminer Kafka Connect into an existing Kafka Connect installation: 42 | ```shell script 43 | wget https://github.com/thake/logminer-kafka-connect/releases/download/0.4.0/thake-logminer-kafka-connect-0.4.0.zip 44 | confluent-hub install ./thake-logminer-kafka-connect-0.4.0.zip --no-prompt 45 | rm ./thake-logminer-kafka-connect-0.4.0.zip 46 | wget https://repo1.maven.org/maven2/com/oracle/database/jdbc/ojdbc8/19.7.0.0/ojdbc8-19.7.0.0.jar -o /usr/share/confluent-hub-components/thake-logminer-kafka-connect/lib/ojdbc8-19.7.0.0.jar 47 | ``` 48 | ### Docker 49 | If you plan to run Logminer Kafka Connect as a container, you can also have a look at the docker image at https://github.com/thake/logminer-kafka-connect-image 50 | 51 | ### Oracle Database Configuration Requirements 52 | In order for Logminer Kafka Connect to work, the database needs to be in ARCHIVELOG mode and Supplemental Logging needs to be 53 | enabled with all columns. Here are the commands that need to be executed in sqlplus: 54 | ```oraclesqlplus 55 | prompt Shutting down database to activate archivelog mode; 56 | shutdown immediate; 57 | startup mount; 58 | alter database archivelog; 59 | prompt Archive log activated.; 60 | alter database add supplemental log data (all) columns; 61 | prompt Activated supplemental logging with all columns.; 62 | prompt Starting up database; 63 | alter database open; 64 | ``` 65 | 66 | ## Initial Import 67 | If the start SCN is not set or set to 0, logminer kafka connect will query 68 | the configured tables for an initial import. During the initial import, no 69 | DDL statements should be executed against the database. Otherwise the initial import will 70 | fail. 71 | 72 | To support initial import, database flashback queries need to be enabled on the source database. 73 | 74 | All rows that are in the table at time of initial import will be treated as "INSERT" changes. 75 | 76 | ## Change Types 77 | The change types are compatible to change types published by the debezium (https://debezium.io/) project. 78 | Thus it is easy to migrate to the official debezium Oracle plugin ones it reaches a stable state. 79 | The key of the kafka topic will be filled with a struct containing the primary key values of the changed row. 80 | 81 | ### Value Struct 82 | The value is a structure having the following fields: 83 | - `op` 84 | Operation that has been executed. 85 | - Type: string 86 | - Possible values: 87 | - 'r' - Read on initial import 88 | - 'i' - Insert 89 | - 'u' - Update 90 | - 'd' - Delete 91 | - `before` 92 | Image of the row before the operation has been executed. Contains all columns. 93 | - Type: struct 94 | - Only filled for the following operations: 95 | - Update 96 | - Delete 97 | - `after` 98 | Image of the row after the operation has been executed. Contains all columns. 99 | - Type: struct 100 | - Only filled for the following operations: 101 | - Insert 102 | - Read 103 | - Update 104 | - `ts_ms` 105 | Timestamp of import as millis since epoch 106 | - Type: Long 107 | - `source` 108 | Additional information about this change record from the source database. 109 | - Type: source 110 | 111 | ### Source struct 112 | 113 | The following source fields will be provided: 114 | - `version` 115 | Version of this component 116 | - Type: string 117 | - Value: '1.0' 118 | - `connector` 119 | Name of this connector. 120 | - Type: string 121 | - Value: 'logminer-kafka-connect' 122 | - `ts_ms` 123 | Timestamp of the change in the source database. 124 | - Type: long 125 | - Logical Name: org.apache.kafka.connect.data.Timestamp 126 | - `scn` 127 | SCN number of the change. For the initial import this is the scn of the las update to the row. 128 | - Type: long 129 | - `txId` 130 | Transaction in which the change occurred in the source database. For the initial import, this field is always null. 131 | - Type: string (optional) 132 | - `table` 133 | Table in the source database for which the change was recorded. 134 | - Type: string 135 | - `schema` 136 | Schema in the source database in which the change was recorded. 137 | - Type: string 138 | - `user` 139 | The user that triggered the change 140 | - Type: string (optional) 141 | 142 | 143 | ## Configuration 144 | You can find an example configuration under [logminer-kafka-connect.properties](logminer-kafka-connect.properties). 145 | 146 | The following configuration parameter are available: 147 | - `db.hostname` 148 | Database hostname 149 | 150 | - Type: string 151 | - Importance: high 152 | 153 | - `db.name` 154 | Logical name of the database. This name will be used as a prefix for 155 | the topic. You can choose this name as you like. 156 | 157 | - Type: string 158 | - Importance: high 159 | 160 | - `db.port` 161 | Database port (usually 1521) 162 | 163 | - Type: int 164 | - Importance: high 165 | 166 | - `db.sid` 167 | Database SID 168 | 169 | - Type: string 170 | - Importance: high 171 | 172 | - `db.user` 173 | Database user 174 | 175 | - Type: string 176 | - Importance: high 177 | 178 | - `db.user.password` 179 | Database password 180 | 181 | - Type: string 182 | - Importance: high 183 | 184 | - `db.logminer.dictionary` 185 | Type of logminer dictionary that should be used. 186 | Valid values: ONLINE, REDO_LOG 187 | 188 | - Type: string 189 | - Default: ONLINE 190 | - Importance: low 191 | - `db.timezone` 192 | The timezone in which TIMESTAMP columns (without any timezone information) should be interpreted as. Valid values are all values that can be passed to https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#of-java.lang.String- 193 | 194 | - Type: string 195 | - Default: UTC 196 | - Importance: high 197 | 198 | - `batch.size` 199 | Batch size of rows that should be fetched in one batch 200 | 201 | - Type: int 202 | - Default: 1000 203 | - Importance: high 204 | 205 | - `start.scn` 206 | Start SCN, if set to 0 an initial intake from the tables will be 207 | performed. 208 | 209 | - Type: long 210 | - Default: 0 211 | - Importance: high 212 | 213 | - `table.whitelist` 214 | Tables that should be monitored, separated by ','. Tables have to be 215 | specified with schema. Table names are case-sensitive (e.g. if your table name is an unquoted identifier, you'll need to specify it in all caps). 216 | You can also just specify a schema to indicate 217 | that all tables within that schema should be monitored. Examples: 218 | 'MY\_USER.TABLE, OTHER\_SCHEMA'. 219 | 220 | - Type: string 221 | - Default: "" 222 | - Importance: high 223 | 224 | - `tombstones.on.delete` 225 | If set to false, no tombstone records will be emitted after a delete operation. 226 | 227 | - Type: boolean 228 | - Default: true 229 | - Importance: high 230 | 231 | - `db.fetch.size` 232 | JDBC result set prefetch size. If not set, it will be defaulted to 233 | batch.size. The fetch should not be smaller than the batch size. 234 | 235 | - Type: int 236 | - Default: null 237 | - Importance: medium 238 | 239 | - `db.attempts` 240 | Maximum number of attempts to retrieve a valid JDBC connection. 241 | 242 | - Type: int 243 | - Default: 3 244 | - Importance: low 245 | 246 | - `db.backoff.ms` 247 | Backoff time in milliseconds between connection attempts. 248 | 249 | - Type: long 250 | - Default: 10000 251 | - Importance: low 252 | 253 | - `poll.interval.ms` 254 | Positive integer value that specifies the number of milliseconds the 255 | connector should wait after a polling attempt didn't retrieve any 256 | results. 257 | 258 | - Type: long 259 | - Default: 2000 260 | - Importance: low 261 | 262 | ## Limitations 263 | - Due to limitations of Oracle Logminer, it is not possible to track the UPDATE statements to existing records that are implicitly performed whenever a new 264 | not null column with a default value will be added to a table. 265 | 266 | However you can change the way you add these columns in order to correctly record the UPDATE statements. 267 | Instead of doing everything in one command, one could separate it into the following steps: 268 | 1. Adding new nullable column 269 | 1. Adding a trigger on insert that automatically inserts the default value for the new nullable column if it is not specified. 270 | 1. Updating the column with the default value for all existing rows 271 | 1. Changing the definition of the column to be NOT NULL with the default value. 272 | 1. Dropping trigger on insert 273 | Performing the DDL in this way would guarantee that the change log will be readable by logminer. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/ColumnSchemaBuilder.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect 2 | 3 | import com.github.thake.logminer.kafka.connect.SchemaType.TimeType.TimestampType 4 | import mu.KotlinLogging 5 | import org.apache.kafka.connect.data.Date 6 | import org.apache.kafka.connect.data.Decimal 7 | import org.apache.kafka.connect.data.SchemaBuilder 8 | import org.apache.kafka.connect.data.Timestamp 9 | import java.math.BigDecimal 10 | import java.sql.ResultSet 11 | import java.time.* 12 | import java.time.format.DateTimeFormatter 13 | import java.util.* 14 | 15 | const val NUMERIC_TYPE_SCALE_LOW = -84 16 | //These values have been derived from tests against Oracle 12c 17 | //Decimal with scale 40 (max digits right of the dot) 18 | const val ORACLE_UNQUALIFIED_NUMBER_SCALE = 40 19 | const val ORACLE_UNQUALIFIED_NUMBER_PRECISION = ORACLE_UNQUALIFIED_NUMBER_SCALE+39 20 | 21 | const val DECIMAL_PRECISION_PROPERTY = "connect.decimal.precision" 22 | private val logger = KotlinLogging.logger {} 23 | val UNRESOLVABLE_DATE_TIME_EXPRESSIONS = arrayOf( 24 | "SYSDATE", 25 | "SYSTIMESTAMP", 26 | "CURRENT_TIMESTAMP", 27 | "CURRENT_DATE", 28 | "LOCALTIMESTAMP" 29 | ) 30 | sealed class SchemaType { 31 | abstract fun createSchemaBuilder(): SchemaBuilder 32 | abstract fun convert(str: String): T 33 | abstract fun extract(index: Int, resultSet: ResultSet): T? 34 | open fun convertDefaultValue(str: String): T? = convert(str) 35 | sealed class NumberType : SchemaType() { 36 | override fun convertDefaultValue(str: String): T? = convert(str.trim()) 37 | 38 | object ByteType : NumberType() { 39 | override fun convert(str: String): Byte = str.toByte() 40 | override fun createSchemaBuilder(): SchemaBuilder = SchemaBuilder.int8() 41 | override fun toString(): String = "Byte" 42 | override fun extract(index: Int, resultSet: ResultSet): Byte = resultSet.getByte(index) 43 | } 44 | 45 | object ShortType : NumberType() { 46 | override fun convert(str: String) = str.toShort() 47 | override fun createSchemaBuilder(): SchemaBuilder = SchemaBuilder.int16() 48 | override fun toString(): String = "Short" 49 | override fun extract(index: Int, resultSet: ResultSet): Short = resultSet.getShort(index) 50 | } 51 | 52 | object IntType : NumberType() { 53 | override fun convert(str: String) = str.toInt() 54 | override fun createSchemaBuilder(): SchemaBuilder = SchemaBuilder.int32() 55 | override fun toString(): String = "Int" 56 | override fun extract(index: Int, resultSet: ResultSet): Int = resultSet.getInt(index) 57 | } 58 | 59 | object LongType : NumberType() { 60 | override fun convert(str: String) = str.toLong() 61 | override fun createSchemaBuilder(): SchemaBuilder = SchemaBuilder.int64() 62 | override fun toString(): String = "Long" 63 | override fun extract(index: Int, resultSet: ResultSet): Long = resultSet.getLong(index) 64 | } 65 | 66 | object FloatType : NumberType() { 67 | override fun convert(str: String) = str.toFloat() 68 | override fun createSchemaBuilder(): SchemaBuilder = SchemaBuilder.float32() 69 | override fun toString(): String = "Float" 70 | override fun extract(index: Int, resultSet: ResultSet): Float = resultSet.getFloat(index) 71 | } 72 | 73 | object DoubleType : NumberType() { 74 | override fun convert(str: String) = str.toDouble() 75 | override fun createSchemaBuilder(): SchemaBuilder = SchemaBuilder.float64() 76 | override fun toString(): String = "Double" 77 | override fun extract(index: Int, resultSet: ResultSet): Double = resultSet.getDouble(index) 78 | } 79 | 80 | data class BigDecimalType(val precision : Int, val scale: Int) : NumberType() { 81 | override fun convert(str: String): BigDecimal = str.toBigDecimal().setScale(scale) 82 | override fun createSchemaBuilder(): SchemaBuilder = Decimal.builder(scale).parameter(DECIMAL_PRECISION_PROPERTY,precision.toString()) 83 | override fun toString(): String = "BigDecimal" 84 | override fun extract(index: Int, resultSet: ResultSet): BigDecimal? = resultSet.getBigDecimal(index) 85 | ?.setScale(scale) 86 | } 87 | } 88 | 89 | object StringType : SchemaType() { 90 | override fun convert(str: String) = str 91 | override fun convertDefaultValue(str: String) = str.trim().removeSurrounding("'") 92 | override fun createSchemaBuilder(): SchemaBuilder = SchemaBuilder.string() 93 | override fun toString(): String = "String" 94 | override fun extract(index: Int, resultSet: ResultSet): String? = resultSet.getString(index) 95 | } 96 | 97 | sealed class TimeType : SchemaType() { 98 | override fun convertDefaultValue(str: String): java.util.Date? { 99 | val asUpper = str.trim().toUpperCase() 100 | return if (UNRESOLVABLE_DATE_TIME_EXPRESSIONS.any { asUpper.contains(it) }) { 101 | null 102 | } else { 103 | convert(cleanDefaultStr(asUpper)) 104 | } 105 | } 106 | 107 | abstract fun cleanDefaultStr(str: String): String 108 | 109 | object DateType : TimeType() { 110 | val localDateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd[ HH:mm:ss]") 111 | override fun convert(str: String): java.util.Date { 112 | return java.util.Date.from( 113 | LocalDate.parse( 114 | str, 115 | localDateFormatter 116 | ).atStartOfDay().toInstant(ZoneOffset.UTC) 117 | ) 118 | } 119 | 120 | override fun cleanDefaultStr(str: String) = str.removeSurrounding("DATE '", "'") 121 | override fun createSchemaBuilder(): SchemaBuilder = Date.builder() 122 | override fun toString(): String = "Date" 123 | //Stripping away hours, minutes and seconds 124 | override fun extract(index: Int, resultSet: ResultSet): java.util.Date? = 125 | resultSet.getDate(index)?.let { 126 | java.util.Date.from(it.toLocalDate().atStartOfDay(ZoneOffset.UTC).toInstant()) 127 | } 128 | } 129 | abstract class TimestampType(val fractionalSeconds: Int = 6) : TimeType(){ 130 | val fractionalSecondsPart : String 131 | get() = if(fractionalSeconds > 0) "[.${"S".repeat(fractionalSeconds)}]" else "" 132 | 133 | abstract val pattern : String 134 | val dateTimeFormatter : DateTimeFormatter by lazy { 135 | DateTimeFormatter.ofPattern(pattern) 136 | } 137 | override fun convert(str: String): java.util.Date = java.util.Date.from(parse(str)) 138 | protected open fun parse(str: String) : Instant = ZonedDateTime.parse(str, dateTimeFormatter).toInstant() 139 | override fun cleanDefaultStr(str: String) = str.removeSurrounding("TIMESTAMP '", "'") 140 | override fun createSchemaBuilder(): SchemaBuilder = Timestamp.builder() 141 | override fun extract(index: Int, resultSet: ResultSet): java.util.Date? = 142 | resultSet.getTimestamp(index)?.let { java.util.Date(it.time) } 143 | 144 | override fun equals(other: Any?): Boolean { 145 | if (this === other) return true 146 | if (other !is TimestampType) return false 147 | 148 | if (fractionalSeconds != other.fractionalSeconds) return false 149 | if (pattern != other.pattern) return false 150 | 151 | return true 152 | } 153 | override fun hashCode(): Int { 154 | var result = fractionalSeconds 155 | result = 31 * result + pattern.hashCode() 156 | return result 157 | } 158 | 159 | class TimestampWithoutTimezone(val defaultTimeZone : ZoneId, fractionalSeconds : Int = 6) : TimestampType(fractionalSeconds) { 160 | override val pattern: String 161 | get() = "yyyy-MM-dd HH:mm:ss$fractionalSecondsPart" 162 | private val cal = Calendar.getInstance(TimeZone.getTimeZone(defaultTimeZone)) 163 | override fun parse(str: String): Instant = LocalDateTime.parse(str, dateTimeFormatter).atZone(defaultTimeZone).toInstant() 164 | override fun extract(index: Int, resultSet: ResultSet): java.util.Date? = 165 | resultSet.getTimestamp(index,cal)?.let { java.util.Date(it.time) } 166 | override fun toString(): String = "Timestamp($fractionalSeconds) ($defaultTimeZone)" 167 | override fun equals(other: Any?): Boolean { 168 | if (this === other) return true 169 | if (other !is TimestampWithoutTimezone) return false 170 | if (!super.equals(other)) return false 171 | 172 | if (defaultTimeZone != other.defaultTimeZone) return false 173 | 174 | return true 175 | } 176 | override fun hashCode(): Int { 177 | var result = super.hashCode() 178 | result = 31 * result + defaultTimeZone.hashCode() 179 | return result 180 | } 181 | } 182 | class TimestampWithTimezone(fractionalSeconds: Int = 6) : TimestampType(fractionalSeconds) { 183 | //Format: 2020-01-27 06:00:00.640000 US/Pacific PDT 184 | override val pattern: String 185 | get() = "yyyy-MM-dd HH:mm:ss$fractionalSecondsPart VV [zzz]" 186 | 187 | override fun toString(): String = "Timestamp($fractionalSeconds) with timezone" 188 | } 189 | class TimestampWithLocalTimezone(fractionalSeconds: Int = 6) : TimestampType(fractionalSeconds){ 190 | //Format: 2020-09-24 10:11:26.684000+00:00 191 | override val pattern: String 192 | get() = "yyyy-MM-dd HH:mm:ss${fractionalSecondsPart}xxx" 193 | override fun toString(): String = "Timestamp with local timezone" 194 | } 195 | } 196 | 197 | } 198 | 199 | 200 | companion object { 201 | fun toSchemaType(columnDataType: ColumnDefinition, defaultZoneId: ZoneId): SchemaType { 202 | val scale = columnDataType.scale 203 | val precision = columnDataType.precision 204 | return when (columnDataType.type) { 205 | "BINARY_FLOAT" -> NumberType.FloatType 206 | "BINARY_DOUBLE" -> NumberType.DoubleType 207 | "NUMBER" -> { 208 | when { 209 | scale == null -> { 210 | //Undefined NUMERIC 211 | logger.warn { "The '${columnDataType.name}' columns type is an unqualified NUMBER. This leads to really huge decimals. Please consider specifying the scale and precision of the column." } 212 | NumberType.BigDecimalType(ORACLE_UNQUALIFIED_NUMBER_PRECISION,ORACLE_UNQUALIFIED_NUMBER_SCALE) 213 | } 214 | precision < 19 -> { // fits in primitive data types. 215 | when { 216 | scale in NUMERIC_TYPE_SCALE_LOW..0 -> { // integer 217 | when { 218 | precision > 9 -> { 219 | NumberType.LongType 220 | } 221 | precision > 4 -> { 222 | NumberType.IntType 223 | } 224 | precision > 2 -> { 225 | NumberType.ShortType 226 | } 227 | else -> { 228 | NumberType.ByteType 229 | } 230 | } 231 | } 232 | precision > 0 -> NumberType.DoubleType 233 | else -> 234 | NumberType.BigDecimalType(precision,scale) 235 | 236 | } 237 | } 238 | else -> { 239 | NumberType.BigDecimalType(precision,scale) 240 | } 241 | } 242 | } 243 | "CHAR", "VARCHAR", "VARCHAR2", "NVARCHAR2", "CLOB", "NCLOB", "LONG", "NCHAR" -> StringType 244 | "DATE" -> TimeType.DateType 245 | else -> 246 | if (columnDataType.type.startsWith("TIMESTAMP")) { 247 | val fractionalSeconds = columnDataType.scale ?: 6 248 | when { 249 | columnDataType.type.endsWith("WITH TIME ZONE") -> TimestampType.TimestampWithTimezone(fractionalSeconds) 250 | columnDataType.type.endsWith("WITH LOCAL TIME ZONE") -> TimestampType.TimestampWithLocalTimezone(fractionalSeconds) 251 | else -> TimestampType.TimestampWithoutTimezone(defaultZoneId,fractionalSeconds) 252 | } 253 | } else { 254 | throw IllegalArgumentException("Type for column data type $columnDataType is currently not supported") 255 | } 256 | } 257 | } 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /src/main/kotlin/com/github/thake/logminer/kafka/connect/logminer/LogminerSession.kt: -------------------------------------------------------------------------------- 1 | package com.github.thake.logminer.kafka.connect.logminer 2 | 3 | import com.github.thake.logminer.kafka.connect.LogminerDictionarySource 4 | import com.github.thake.logminer.kafka.connect.SchemaSelector 5 | import com.github.thake.logminer.kafka.connect.TableSelector 6 | import com.github.thake.logminer.kafka.connect.logminer.LogminerSchema.END_LOGMINER_SESSION_QUERY 7 | import mu.KotlinLogging 8 | import java.sql.Connection 9 | import java.sql.PreparedStatement 10 | import java.sql.ResultSet 11 | 12 | private val logger = KotlinLogging.logger {} 13 | /** 14 | * Describes the logminer schema and all SQL calls needed to fetch results from the logminer * 15 | */ 16 | object LogminerSchema { 17 | object Fields { 18 | const val SEG_OWNER = "SEG_OWNER" 19 | const val TABLE_NAME = "TABLE_NAME" 20 | const val TIMESTAMP = "TIMESTAMP" 21 | const val SQL_REDO = "SQL_REDO" 22 | const val OPERATION = "OPERATION" 23 | const val USERNAME = "USERNAME" 24 | const val XID = "xid" 25 | const val SCN = "SCN" 26 | const val ROW_ID = "ROW_ID" 27 | const val CSF = "CSF" 28 | const val STATUS = "STATUS" 29 | } 30 | 31 | const val TEMPORARY_TABLE = "temporary tables" 32 | const val NULL_VALUE = "NULL" 33 | 34 | const val QUERY_MONITORED_LOGS = """ 35 | select filename as name, low_scn, next_scn from V${'$'}LOGMNR_LOGS 36 | """ 37 | const val START_LOGMINER_SESSION_REDO_LOG_DICTIONARY_QUERY = """ 38 | declare 39 | start_scn NUMBER := ?; 40 | catalog_exists NUMBER := 0; 41 | type logfile_name_table is table of NUMBER INDEX BY V${'$'}LOGFILE.member%TYPE; 42 | v_logfiles logfile_name_table; 43 | i V${'$'}LOGFILE.member%TYPE; 44 | start_scn_in_logfiles NUMBER := 0; 45 | begin 46 | --First check if a catalog exists in log files. Add all logfiles afterwards so that DDL tracking works. 47 | for l_cat_log_file in ( 48 | select name,FIRST_CHANGE# from v${'$'}archived_log where NAME IS NOT NULL AND FIRST_CHANGE# >= (select max(FIRST_CHANGE#) from V${'$'}ARCHIVED_LOG where DICTIONARY_BEGIN = 'YES') 49 | ) 50 | loop 51 | v_logfiles(l_cat_log_file.NAME) := l_cat_log_file.FIRST_CHANGE#; 52 | catalog_exists := 1; 53 | end loop; 54 | 55 | if(catalog_exists = 0) then 56 | -- Create a catalog 57 | DBMS_OUTPUT.PUT_LINE('Found no dictionary in redo logs. Building dictionary for logminer stored in redo logs.'); 58 | DBMS_LOGMNR_D.BUILD(OPTIONS=> DBMS_LOGMNR_D.STORE_IN_REDO_LOGS); 59 | else 60 | DBMS_OUTPUT.PUT_LINE('Found a dictionary in redo logs. Using it as a reference.'); 61 | end if; 62 | for l_log_rec IN (select min(name) name, FIRST_CHANGE#, NEXT_CHANGE# 63 | from ( 64 | select min(member) as name, FIRST_CHANGE#, NEXT_CHANGE# 65 | from V${'$'}LOG l 66 | inner join V${'$'}LOGFILE f on l.GROUP# = f.GROUP# 67 | group by first_change#, NEXT_CHANGE# 68 | union 69 | select name, FIRST_CHANGE#, NEXT_CHANGE# 70 | From V${'$'}ARCHIVED_LOG 71 | where name is not null) 72 | where FIRST_CHANGE# >= start_scn OR start_scn < NEXT_CHANGE# 73 | group by first_change#, next_change# 74 | order by FIRST_CHANGE#) 75 | loop 76 | v_logfiles(l_log_rec.name) := l_log_rec.FIRST_CHANGE#; 77 | end loop; 78 | i := v_logfiles.FIRST; 79 | WHILE i IS NOT NULL LOOP 80 | if i = v_logfiles.FIRST then 81 | DBMS_OUTPUT.PUT_LINE('Restarting logminer with logfile '||i); 82 | DBMS_LOGMNR.ADD_LOGFILE(i,DBMS_LOGMNR.NEW); 83 | else 84 | DBMS_OUTPUT.PUT_LINE('Adding logfile: '||i); 85 | DBMS_LOGMNR.ADD_LOGFILE(i); 86 | end if; 87 | if(v_logfiles(i) <= start_scn) then 88 | start_scn_in_logfiles := 1; 89 | end if; 90 | i := v_logfiles.NEXT(i); -- Get next element of array 91 | END LOOP; 92 | if(start_scn_in_logfiles = 0) then 93 | DBMS_OUTPUT.PUT_LINE('Start scn has not been found in available log files. Setting start scn to earliest available scn'); 94 | start_scn := 0; 95 | end if; 96 | DBMS_LOGMNR.START_LOGMNR(startScn => start_scn, 97 | OPTIONS => DBMS_LOGMNR.SKIP_CORRUPTION + DBMS_LOGMNR.NO_SQL_DELIMITER + DBMS_LOGMNR.NO_ROWID_IN_STMT + 98 | DBMS_LOGMNR.DICT_FROM_REDO_LOGS + DBMS_LOGMNR.DDL_DICT_TRACKING + 99 | dbms_logmnr.STRING_LITERALS_IN_STMT); 100 | end; 101 | """ 102 | const val START_OR_UPDATE_LOGMINER_SESSION_ONLINE_QUERY = """ 103 | declare 104 | st BOOLEAN := TRUE; 105 | start_scn NUMBER := ?; 106 | begin 107 | for l_log_rec IN (select min(name) name, FIRST_CHANGE#, NEXT_CHANGE# 108 | from ( 109 | select min(member) as name, FIRST_CHANGE#, NEXT_CHANGE# 110 | from V${'$'}LOG l 111 | inner join V${'$'}LOGFILE f on l.GROUP# = f.GROUP# 112 | group by first_change#, NEXT_CHANGE# 113 | union 114 | select name, FIRST_CHANGE#, NEXT_CHANGE# 115 | From V${'$'}ARCHIVED_LOG 116 | where name is not null) 117 | where FIRST_CHANGE# >= start_scn OR start_scn < NEXT_CHANGE# 118 | group by first_change#, next_change# 119 | order by FIRST_CHANGE#) 120 | loop 121 | if st then 122 | DBMS_LOGMNR.ADD_LOGFILE(l_log_rec.name,DBMS_LOGMNR.NEW); 123 | st := FALSE; 124 | else 125 | DBMS_LOGMNR.ADD_LOGFILE(l_log_rec.name); 126 | end if; 127 | end loop; 128 | DBMS_LOGMNR.START_LOGMNR( 129 | OPTIONS => DBMS_LOGMNR.SKIP_CORRUPTION + DBMS_LOGMNR.NO_SQL_DELIMITER + DBMS_LOGMNR.NO_ROWID_IN_STMT + 130 | DBMS_LOGMNR.DICT_FROM_ONLINE_CATALOG + 131 | dbms_logmnr.STRING_LITERALS_IN_STMT); 132 | end; 133 | """ 134 | const val UPDATE_MINER_QUERY = """ 135 | declare 136 | start_scn NUMBER := ?; 137 | file_added NUMBER := 0; 138 | begin 139 | for l_log_rec IN (select name,SEQUENCE#,FIRST_CHANGE#,NEXT_CHANGE# from (select min(name) name, SEQUENCE#, FIRST_CHANGE#, NEXT_CHANGE# 140 | from ( 141 | select min(member) as name, SEQUENCE#, FIRST_CHANGE#, NEXT_CHANGE# 142 | from V${'$'}LOG l 143 | inner join V${'$'}LOGFILE f on l.GROUP# = f.GROUP# 144 | group by SEQUENCE#,first_change#, NEXT_CHANGE# 145 | union 146 | select name,SEQUENCE#, FIRST_CHANGE#, NEXT_CHANGE# 147 | From V${'$'}ARCHIVED_LOG 148 | where name is not null) 149 | where FIRST_CHANGE# >= start_scn OR start_scn < NEXT_CHANGE# 150 | group by SEQUENCE#,first_change#, next_change# 151 | ) needed_logs where SEQUENCE# not in (select log_ID from V${'$'}LOGMNR_LOGS) order by FIRST_CHANGE#) 152 | loop 153 | DBMS_LOGMNR.ADD_LOGFILE(l_log_rec.name); 154 | DBMS_OUTPUT.PUT_LINE('Added logfile '||l_log_rec.name||' to logminer session'); 155 | file_added := 1; 156 | end loop; 157 | if(file_added > 0) then 158 | DBMS_LOGMNR.START_LOGMNR(startScn => start_scn, 159 | OPTIONS => DBMS_LOGMNR.SKIP_CORRUPTION + DBMS_LOGMNR.NO_SQL_DELIMITER + DBMS_LOGMNR.NO_ROWID_IN_STMT + 160 | DBMS_LOGMNR.DICT_FROM_REDO_LOGS + DBMS_LOGMNR.DDL_DICT_TRACKING + 161 | dbms_logmnr.STRING_LITERALS_IN_STMT); 162 | DBMS_OUTPUT.PUT_LINE('Restarted logminer with added logfiles loaded and start scn '||start_scn); 163 | else 164 | DBMS_OUTPUT.PUT_LINE('No new logfile has been added. Restart of logminer was not needed'); 165 | end if; 166 | end; 167 | """ 168 | 169 | const val QUERY_LOGMINER_START = 170 | """ 171 | SELECT 172 | scn, 173 | commit_scn, 174 | timestamp, 175 | operation, 176 | seg_owner, 177 | table_name, 178 | username, 179 | sql_redo, 180 | row_id, 181 | CSF, 182 | xid, 183 | status 184 | FROM v${'$'}logmnr_contents 185 | WHERE ROLLBACK = 0 186 | AND ( 187 | (scn >= ? AND OPERATION_CODE IN (7,36) AND USERNAME not in ('UNKNOWNX','KMINER')) 188 | OR 189 | (scn >= ? AND OPERATION_CODE in (1,2,3) and ( 190 | """ 191 | const val QUERY_LOGMINER_END = ")))" 192 | 193 | const val END_LOGMINER_SESSION_QUERY = """ 194 | begin 195 | DBMS_LOGMNR.END_LOGMNR(); 196 | end; 197 | """ 198 | } 199 | data class Log( 200 | val name : String, 201 | val scnRange : LongRange 202 | ) 203 | sealed class DictionaryStrategy{ 204 | fun initSession(conn: Connection, offset: FetcherOffset) { 205 | logger.info { "Checking if a session is already running" } 206 | val monitoredLogs = determineMonitoredLogs(conn) 207 | val fromScn = offset.lowestChangeScn 208 | if (monitoredLogs.isEmpty() || monitoredLogs.none { it.scnRange.contains(fromScn) }) { 209 | logger.info { "Starting oracle logminer with scn $fromScn" } 210 | initSession(conn, fromScn) 211 | } else { 212 | logger.info { "Logminer session already exists. Refreshing existing session." } 213 | updateSession(conn, fromScn) 214 | } 215 | logger.debug { 216 | val nowMonitoredLogs = determineMonitoredLogs(conn) 217 | "Monitoring ${nowMonitoredLogs.size} Logfiles" 218 | } 219 | } 220 | protected abstract fun initSession(conn : Connection, fromScn: Long) 221 | protected abstract fun updateSession(conn : Connection, fromScn: Long) 222 | protected fun determineMonitoredLogs(conn : Connection): Set { 223 | return conn.prepareStatement(LogminerSchema.QUERY_MONITORED_LOGS).use { stmt -> 224 | stmt.executeQuery().use { 225 | val monitoredLogs = mutableSetOf() 226 | while (it.next()) { 227 | monitoredLogs.add(Log(it.getString(1),LongRange(it.getLong(2),it.getLong(3)))) 228 | } 229 | monitoredLogs 230 | } 231 | } 232 | } 233 | 234 | object RedoLogStrategy : DictionaryStrategy() { 235 | 236 | override fun initSession(conn : Connection, fromScn : Long){ 237 | conn.prepareCall(LogminerSchema.START_LOGMINER_SESSION_REDO_LOG_DICTIONARY_QUERY).use { 238 | it.setLong(1, fromScn) 239 | it.execute() 240 | } 241 | } 242 | 243 | override fun updateSession(conn: Connection, fromScn: Long) { 244 | logger.info { "Updating oracle logminer session to include newest logfiles and to start from scn $fromScn" } 245 | conn.prepareCall(LogminerSchema.UPDATE_MINER_QUERY).use { 246 | it.setLong(1, fromScn) 247 | it.execute() 248 | } 249 | } 250 | } 251 | object OnlineLogStrategy : DictionaryStrategy(){ 252 | override fun initSession(conn: Connection, fromScn: Long) { 253 | conn.prepareCall(LogminerSchema.START_OR_UPDATE_LOGMINER_SESSION_ONLINE_QUERY).use{ 254 | it.setLong(1,fromScn) 255 | it.execute() 256 | } 257 | } 258 | 259 | override fun updateSession(conn: Connection, fromScn: Long) = initSession(conn,fromScn) 260 | 261 | 262 | } 263 | } 264 | class LogminerSession(private val conn : Connection, 265 | private val offset : FetcherOffset, 266 | private val config: LogminerConfiguration){ 267 | 268 | private val logMinerSelectSql: String by lazy { 269 | config.logMinerSelectors.joinToString( 270 | separator = " OR ", 271 | prefix = LogminerSchema.QUERY_LOGMINER_START, 272 | postfix = LogminerSchema.QUERY_LOGMINER_END 273 | ) { 274 | when (it) { 275 | is TableSelector -> "(${LogminerSchema.Fields.SEG_OWNER} ='${it.owner}' and ${LogminerSchema.Fields.TABLE_NAME} = '${it.tableName}')" 276 | is SchemaSelector -> "(${LogminerSchema.Fields.SEG_OWNER} ='${it.owner}')" 277 | } 278 | } 279 | } 280 | private val stmt: PreparedStatement = 281 | conn.prepareStatement( 282 | logMinerSelectSql, 283 | ResultSet.TYPE_FORWARD_ONLY, 284 | ResultSet.CONCUR_READ_ONLY 285 | ).apply { 286 | fetchSize = config.fetchSize 287 | logger.debug { "Querying oracle logminer with fetch size $fetchSize" } 288 | setLong(1, offset.lowestCommitScn) 289 | setLong(2, offset.lowestChangeScn) 290 | } 291 | fun openResultSet(): ResultSet { 292 | logger.debug { "Select statement for oracle logminer(scn = ${offset.lastScn}): $logMinerSelectSql" } 293 | return stmt.executeQuery() 294 | } 295 | fun endSession(){ 296 | close() 297 | conn.prepareCall(END_LOGMINER_SESSION_QUERY).use { 298 | it.execute() 299 | } 300 | } 301 | fun close(){ 302 | if (!stmt.isClosed) stmt.close() 303 | } 304 | 305 | companion object { 306 | fun initSession(conn: Connection, offset: FetcherOffset, config: LogminerConfiguration) : LogminerSession{ 307 | val strategy = when(config.logminerDictionarySource){ 308 | LogminerDictionarySource.REDO_LOG -> DictionaryStrategy.RedoLogStrategy 309 | LogminerDictionarySource.ONLINE -> DictionaryStrategy.OnlineLogStrategy 310 | } 311 | strategy.initSession(conn,offset) 312 | return LogminerSession(conn,offset,config) 313 | } 314 | 315 | } 316 | } --------------------------------------------------------------------------------