├── src ├── CdcTools.CdcReader.Database │ ├── CdcTools.sql │ ├── Tables │ │ ├── TransactionState.sql │ │ ├── FullLoadState.sql │ │ └── ChangeState.sql │ └── CdcTools.CdcReader.Database.sqlproj ├── CdcTools.CdcToKafka.Streaming │ ├── dockerfile │ ├── RunMode.cs │ ├── SerializationMode.cs │ ├── Build.sh │ ├── TestBuild.sh │ ├── CdcState.cs │ ├── Producers │ │ ├── IKafkaProducer.cs │ │ ├── ProducerBase.cs │ │ ├── ProducerFactory.cs │ │ ├── NonKeyedJsonProducer.cs │ │ ├── KeyedJsonProducer.cs │ │ ├── NonKeyedAvroProducer.cs │ │ └── KeyedAvroProducer.cs │ ├── CdcRequest.cs │ ├── RowChange.cs │ ├── appsettings.json │ ├── CdcTools.CdcToKafka.Streaming.csproj │ ├── Serialization │ │ └── AvroTableTypeConverter.cs │ ├── FullLoadStreamer.cs │ └── Program.cs ├── CdcTools.CdcToRedshift │ ├── Docker │ │ ├── odbcinst.ini │ │ ├── env.sh │ │ ├── odbc.ini │ │ ├── install-redshift-drivers.sh │ │ └── amazon.redshiftodbc.ini │ ├── RunMode.cs │ ├── Build.sh │ ├── TestBuild.sh │ ├── NonTransactional │ │ └── CdcState.cs │ ├── dockerfile │ ├── appsettings.json │ ├── CdcTools.CdcToRedshift.csproj │ ├── Transactional │ │ └── TransactionExporter.cs │ └── FullLoadExporter.cs ├── CdcTools.KafkaToRedshift │ ├── Docker │ │ ├── odbcinst.ini │ │ ├── env.sh │ │ ├── odbc.ini │ │ ├── install-redshift-drivers.sh │ │ └── amazon.redshiftodbc.ini │ ├── SerializationMode.cs │ ├── Build.sh │ ├── TestBuild.sh │ ├── Consumers │ │ ├── KafkaSource.cs │ │ ├── IConsumer.cs │ │ ├── MessageProxy.cs │ │ ├── KeyedJsonConsumer.cs │ │ ├── NonKeyedJsonConsumer.cs │ │ ├── KeyedAvroConsumer.cs │ │ └── NonKeyedAvroConsumer.cs │ ├── dockerfile │ ├── Redshift │ │ ├── IRedshiftWriter.cs │ │ └── RedshiftWriter.cs │ ├── appsettings.json │ ├── CdcTools.KafkaToRedshift.csproj │ ├── Serialization │ │ └── AvroTableTypeConverter.cs │ └── Program.cs ├── CdcTools.CdcReader │ ├── Tables │ │ ├── PrimaryKeyColumn.cs │ │ ├── ITableSchemaRepository.cs │ │ ├── TablePrimaryKey.cs │ │ ├── IFullLoadRepository.cs │ │ ├── ICdcRepository.cs │ │ ├── TableColumn.cs │ │ ├── TableSchema.cs │ │ ├── ColumnSchema.cs │ │ ├── TableSchemaQueryBuilder.cs │ │ ├── TableSchemaRepository.cs │ │ └── FullLoadRepository.cs │ ├── State │ │ ├── Offset.cs │ │ ├── StateResult.cs │ │ └── IStateManager.cs │ ├── Changes │ │ ├── ChangeType.cs │ │ ├── FullLoadRecord.cs │ │ ├── FullLoadBatch.cs │ │ ├── ChangeBatch.cs │ │ ├── PrimaryKeyValue.cs │ │ └── ChangeRecord.cs │ ├── CdcTools.CdcReader.csproj │ └── CdcReaderClient.cs ├── CdcTools.Redshift │ ├── Changes │ │ ├── ChangeType.cs │ │ └── RowChange.cs │ ├── CdcTools.Redshift.csproj │ ├── IRedshiftDao.cs │ ├── S3 │ │ ├── S3TableDocuments.cs │ │ ├── IS3Uploader.cs │ │ └── S3Uploader.cs │ ├── RedshiftConfiguration.cs │ └── RedshiftClient.cs ├── CdcTools.CdcReader.Transactional │ ├── State │ │ ├── IStateManager.cs │ │ ├── StateResult.cs │ │ └── StateManager.cs │ ├── ReaderException.cs │ ├── CdcTools.CdcReader.Transactional.csproj │ ├── TransactionBatch.cs │ ├── ITransactionCoordinator.cs │ ├── TransactionId.cs │ └── CdcTransactionClient.cs └── CdcTools.SqlToAvro │ ├── CdcTools.SqlToAvro.csproj │ └── AvroSchemaGenerator.cs ├── README.md ├── environment ├── CdcApps │ └── Docker │ │ ├── full-load-kafka │ │ ├── readme.txt │ │ └── docker-compose.yml │ │ ├── full-redshift │ │ ├── readme.txt │ │ └── docker-compose.yml │ │ ├── cdc-kafka-redshift │ │ ├── readme.txt │ │ └── docker-compose.yml │ │ ├── cdc-non-tran-redshift │ │ ├── readme.txt │ │ └── docker-compose.yml │ │ └── cdc-tran-redshift │ │ ├── readme.txt │ │ └── docker-compose.yml ├── RedshiftTables │ └── create-tables.sql ├── kafka-readme.txt ├── SourceTables │ ├── create-tables.sql │ └── modify-data.sql └── kafka │ └── docker-compose.yml ├── CdcTools.sln └── .gitignore /src/CdcTools.CdcReader.Database/CdcTools.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA [CdcTools] 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CDC-Tools 2 | .NET Core applications and libraries for working with SQL Server CDC, with outputs to SQL Server, Kafka, Redshift 3 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/dockerfile: -------------------------------------------------------------------------------- 1 | FROM microsoft/dotnet:2.0-sdk 2 | ARG source 3 | WORKDIR /app 4 | COPY ${source:-obj/Docker/publish} . 5 | ENTRYPOINT ["dotnet", "CdcTools.CdcToKafka.Streaming.dll"] -------------------------------------------------------------------------------- /environment/CdcApps/Docker/full-load-kafka/readme.txt: -------------------------------------------------------------------------------- 1 | Run with the command: 2 | docker-compose run --rm FullLoadToKafka 3 | 4 | This will create the container and automatically remove once the full load is complete. -------------------------------------------------------------------------------- /environment/CdcApps/Docker/full-redshift/readme.txt: -------------------------------------------------------------------------------- 1 | Run with the command: 2 | docker-compose run --rm FullLoadToRedshift 3 | 4 | This will create the container and automatically remove once the full load is complete. -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/Docker/odbcinst.ini: -------------------------------------------------------------------------------- 1 | [ODBC Drivers] 2 | Amazon Redshift (x64)=Installed 3 | 4 | [Amazon Redshift (x64)] 5 | Description=Amazon Redshift ODBC Driver (64-bit) 6 | Driver=/opt/amazon/redshiftodbc/lib/64/libamazonredshiftodbc64.so 7 | 8 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Docker/odbcinst.ini: -------------------------------------------------------------------------------- 1 | [ODBC Drivers] 2 | Amazon Redshift (x64)=Installed 3 | 4 | [Amazon Redshift (x64)] 5 | Description=Amazon Redshift ODBC Driver (64-bit) 6 | Driver=/opt/amazon/redshiftodbc/lib/64/libamazonredshiftodbc64.so 7 | 8 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/Docker/env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib 4 | export ODBCINI=/etc/odbc.ini 5 | export AMAZONREDSHIFTODBCINI=/opt/amazon/redshiftodbc/lib/64/amazon.redshiftodbc.ini 6 | export ODBCSYSINI=/etc 7 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Docker/env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib 4 | export ODBCINI=/etc/odbc.ini 5 | export AMAZONREDSHIFTODBCINI=/opt/amazon/redshiftodbc/lib/64/amazon.redshiftodbc.ini 6 | export ODBCSYSINI=/etc 7 | -------------------------------------------------------------------------------- /environment/CdcApps/Docker/cdc-kafka-redshift/readme.txt: -------------------------------------------------------------------------------- 1 | This is an always running process. Run with the command: 2 | docker-compose up 3 | 4 | or run it in detached mode: 5 | docker-compose up -d 6 | 7 | When in detached mode, monitor the logs with 8 | docker-compose logs -f -t 9 | -------------------------------------------------------------------------------- /environment/CdcApps/Docker/cdc-non-tran-redshift/readme.txt: -------------------------------------------------------------------------------- 1 | This is an always running process. Run with the command: 2 | docker-compose up 3 | 4 | or run it in detached mode: 5 | docker-compose up -d 6 | 7 | When in detached mode, monitor the logs with 8 | docker-compose logs -f -t 9 | -------------------------------------------------------------------------------- /environment/CdcApps/Docker/cdc-tran-redshift/readme.txt: -------------------------------------------------------------------------------- 1 | This is an always running process. Run with the command: 2 | docker-compose up 3 | 4 | or run it in detached mode: 5 | docker-compose up -d 6 | 7 | When in detached mode, monitor the logs with 8 | docker-compose logs -f -t 9 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/RunMode.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcToKafka 6 | { 7 | public enum RunMode 8 | { 9 | NonTransactionalCdc, 10 | FullLoad 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/SerializationMode.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.KafkaToRedshift 6 | { 7 | public enum SerializationMode 8 | { 9 | Json, 10 | Avro 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/SerializationMode.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcToKafka.Streaming 6 | { 7 | public enum SerializationMode 8 | { 9 | Json, 10 | Avro 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Database/Tables/TransactionState.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE [CdcTools].[TransactionState]( 2 | [ExecutionId] [varchar](50) NOT NULL, 3 | [Lsn] [binary](10) NOT NULL, 4 | [LastUpdate] [datetime] NOT NULL, 5 | PRIMARY KEY CLUSTERED 6 | ( 7 | [ExecutionId] ASC 8 | ) 9 | ) 10 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/RunMode.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcToRedshift 6 | { 7 | public enum RunMode 8 | { 9 | NonTransactionalCdc, 10 | TransactionalCdc, 11 | FullLoad 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/Build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ev 4 | 5 | dotnet publish -c Release -o ./obj/Docker/publish 6 | docker build -t cdctools.cdc-to-redshift . 7 | docker tag cdctools.cdc-to-redshift jackvanlightly/cdctools.cdc-to-redshift:latest 8 | docker push jackvanlightly/cdctools.cdc-to-redshift:latest -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/Docker/odbc.ini: -------------------------------------------------------------------------------- 1 | [ODBC Data Sources] 2 | Amazon_Redshift_x64=Amazon Redshift (x64) 3 | 4 | 5 | [Amazon Redshift (x64)] 6 | Driver=/opt/amazon/redshiftodbc/lib/64/libamazonredshiftodbc64.so 7 | Server=blahblah.redshift.amazonaws.com 8 | Port=5439 9 | Database=dev 10 | locale=en-US 11 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ev 3 | 4 | dotnet publish -c Release -o ./obj/Docker/publish 5 | docker build -t cdctools.kafka-to-redshift . 6 | docker tag cdctools.kafka-to-redshift jackvanlightly/cdctools.kafka-to-redshift:latest 7 | docker push jackvanlightly/cdctools.kafka-to-redshift:latest -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Docker/odbc.ini: -------------------------------------------------------------------------------- 1 | [ODBC Data Sources] 2 | Amazon_Redshift_x64=Amazon Redshift (x64) 3 | 4 | 5 | [Amazon Redshift (x64)] 6 | Driver=/opt/amazon/redshiftodbc/lib/64/libamazonredshiftodbc64.so 7 | Server=blahblah.redshift.amazonaws.com 8 | Port=5439 9 | Database=dev 10 | locale=en-US 11 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/TestBuild.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ev 4 | 5 | dotnet publish -c Release -o ./obj/Docker/publish 6 | docker build -t cdctools.cdc-to-redshift . 7 | docker tag cdctools.cdc-to-redshift jackvanlightly/cdctools.cdc-to-redshift:latest-test 8 | docker push jackvanlightly/cdctools.cdc-to-redshift:latest-test -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/TestBuild.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ev 3 | 4 | dotnet publish -c Release -o ./obj/Docker/publish 5 | docker build -t cdctools.kafka-to-redshift . 6 | docker tag cdctools.kafka-to-redshift jackvanlightly/cdctools.kafka-to-redshift:latest-test 7 | docker push jackvanlightly/cdctools.kafka-to-redshift:latest-test -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/Build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ev 3 | 4 | dotnet publish -c Release -o ./obj/Docker/publish 5 | docker build -t cdctools.cdc-to-kafka-streaming . 6 | docker tag cdctools.cdc-to-kafka-streaming jackvanlightly/cdctools.cdc-to-kafka-streaming:latest 7 | docker push jackvanlightly/cdctools.cdc-to-kafka-streaming:latest -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Consumers/KafkaSource.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.KafkaToRedshift.Consumers 6 | { 7 | public class KafkaSource 8 | { 9 | public string Table { get; set; } 10 | public string Topic { get; set; } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/PrimaryKeyColumn.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.Tables 6 | { 7 | public class PrimaryKeyColumn 8 | { 9 | public string ColumnName { get; set; } 10 | public int OrdinalPosition { get; set; } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/TestBuild.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ev 3 | 4 | dotnet publish -c Release -o ./obj/Docker/publish 5 | docker build -t cdctools.cdc-to-kafka-streaming . 6 | docker tag cdctools.cdc-to-kafka-streaming jackvanlightly/cdctools.cdc-to-kafka-streaming:latest-test 7 | docker push jackvanlightly/cdctools.cdc-to-kafka-streaming:latest-test -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Database/Tables/FullLoadState.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE [CdcTools].[FullLoadState]( 2 | [ExecutionId] [varchar](50) NOT NULL, 3 | [TableName] [varchar](200) NOT NULL, 4 | [PrimaryKeyValue] [varchar](max) NULL, 5 | [LastUpdate] [datetime] NOT NULL, 6 | PRIMARY KEY CLUSTERED 7 | ( 8 | [ExecutionId] ASC, 9 | [TableName] ASC 10 | ) 11 | ) 12 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/State/Offset.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.State 6 | { 7 | public class Offset 8 | { 9 | public byte[] Lsn { get; set; } 10 | public byte[] SeqVal { get; set; } 11 | public bool UnfinishedLsn { get; set; } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/ITableSchemaRepository.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using System.Threading.Tasks; 5 | 6 | namespace CdcTools.CdcReader.Tables 7 | { 8 | public interface ITableSchemaRepository 9 | { 10 | Task GetTableSchemaAsync(string tableName); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/NonTransactional/CdcState.cs: -------------------------------------------------------------------------------- 1 | namespace CdcTools.CdcToRedshift.NonTransactional 2 | { 3 | public class CdcState 4 | { 5 | public byte[] ToLsn { get; set; } 6 | public byte[] FromSeqVal { get; set; } 7 | public byte[] FromLsn { get; set; } 8 | public bool UnfinishedLsn { get; set; } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/CdcTools.Redshift/Changes/ChangeType.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.Redshift.Changes 6 | { 7 | public enum ChangeType 8 | { 9 | NOT_DEFINED = 0, 10 | DELETE = 1, 11 | INSERT = 2, 12 | UPDATE_BEFORE = 3, 13 | UPDATE_AFTER = 4 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Changes/ChangeType.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.Changes 6 | { 7 | public enum ChangeType 8 | { 9 | NOT_DEFINED = 0, 10 | DELETE = 1, 11 | INSERT = 2, 12 | UPDATE_BEFORE = 3, 13 | UPDATE_AFTER = 4 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Database/Tables/ChangeState.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE [CdcTools].[ChangeState]( 2 | [ExecutionId] [varchar](50) NOT NULL, 3 | [TableName] [varchar](200) NOT NULL, 4 | [Lsn] [binary](10) NOT NULL, 5 | [SeqVal] [binary](10) NOT NULL, 6 | [LastUpdate] [datetime] NOT NULL, 7 | PRIMARY KEY CLUSTERED 8 | ( 9 | [ExecutionId] ASC, 10 | [TableName] ASC 11 | ) 12 | ) 13 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/TablePrimaryKey.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.Tables 6 | { 7 | internal class TablePrimaryKey 8 | { 9 | public string TableName { get; set; } 10 | public string ColumnName { get; set; } 11 | public int OrdinalPosition { get; set; } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Transactional/State/IStateManager.cs: -------------------------------------------------------------------------------- 1 | using System.Threading.Tasks; 2 | 3 | namespace CdcTools.CdcReader.Transactional.State 4 | { 5 | public interface IStateManager 6 | { 7 | Task StoreTransactionIdAsync(string executionId, TransactionId transactionId); 8 | Task> GetLastTransactionIdAsync(string executionId); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/CdcTools.CdcReader.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/CdcTools.Redshift/CdcTools.Redshift.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/CdcState.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcToKafka.Streaming 6 | { 7 | public class CdcState 8 | { 9 | public byte[] ToLsn { get; set; } 10 | public byte[] FromSeqVal { get; set; } 11 | public byte[] FromLsn { get; set; } 12 | public bool UnfinishedLsn { get; set; } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/Producers/IKafkaProducer.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Changes; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | using System.Threading; 6 | using System.Threading.Tasks; 7 | 8 | namespace CdcTools.CdcToKafka.Streaming.Producers 9 | { 10 | public interface IKafkaProducer : IDisposable 11 | { 12 | Task SendAsync(CancellationToken token, ChangeRecord changeRecord); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/CdcTools.Redshift/IRedshiftDao.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.Redshift.S3; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace CdcTools.Redshift 8 | { 9 | public interface IRedshiftDao 10 | { 11 | Task PerformCsvMergeAsync(List tableUpdates); 12 | Task LoadTableColumnsAsync(List tables); 13 | List GetOrderedColumns(string tableName); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Consumers/IConsumer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | 7 | namespace CdcTools.KafkaToRedshift.Consumers 8 | { 9 | public interface IConsumer 10 | { 11 | Task StartConsumingAsync(CancellationToken token, TimeSpan windowSizePeriod, int windowSizeItems, List kafkaSources); 12 | void WaitForCompletion(); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Transactional/ReaderException.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.Transactional 6 | { 7 | public class ReaderException : Exception 8 | { 9 | public ReaderException(string message, Exception ex, byte[] currentLsn) 10 | : base(message, ex) 11 | { 12 | CurrentLsn = currentLsn; 13 | } 14 | 15 | public byte[] CurrentLsn { get; set; } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/CdcTools.Redshift/S3/S3TableDocuments.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.Redshift.S3 6 | { 7 | public class S3TableDocuments 8 | { 9 | public string Lsn { get; set; } 10 | public string TableName { get; set; } 11 | public string UpsertPath { get; set; } 12 | public string DeletePath { get; set; } 13 | public int Part { get; set; } 14 | public int PartCount { get; set; } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/CdcTools.SqlToAvro/CdcTools.SqlToAvro.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/CdcRequest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcToKafka.Streaming 6 | { 7 | public class CdcRequest 8 | { 9 | public string ExecutionId { get; set; } 10 | public TimeSpan Interval { get; set; } 11 | public List Tables { get; set; } 12 | public SerializationMode SerializationMode { get; set; } 13 | public int BatchSize { get; set; } 14 | public bool SendWithKey { get; set; } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /environment/RedshiftTables/create-tables.sql: -------------------------------------------------------------------------------- 1 | create table "person" 2 | ( 3 | personid int not null distkey sortkey, 4 | firstname nvarchar(200) not null, 5 | surname nvarchar(200) not null, 6 | dateofbirth date not null, 7 | primary key(personid) 8 | ); 9 | 10 | create table personaddress 11 | ( 12 | addressid int not null, 13 | addressline1 nvarchar(1000) not null, 14 | city nvarchar(100) not null, 15 | postalcode nvarchar(20) not null, 16 | country nvarchar(100) not null, 17 | personid int not null distkey sortkey, 18 | primary key(addressid) 19 | ); -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Transactional/CdcTools.CdcReader.Transactional.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/Docker/install-redshift-drivers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install the Redshift ODBC driver manager 4 | apt-get update \ 5 | && apt-get install -y --no-install-recommends unixodbc 6 | 7 | 8 | if ! wget https://s3.amazonaws.com/redshift-downloads/drivers/odbc/1.4.1.1001/AmazonRedshiftODBC-64-bit-1.4.1.1001-1.x86_64.deb; then 9 | echo 'Failed to download Redshift ODBC Driver!' 1>&2 10 | exit 1 11 | fi 12 | 13 | # Install the Redshift ODBC driver 14 | apt install ./AmazonRedshiftODBC-64-bit-1.4.1.1001-1.x86_64.deb 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/dockerfile: -------------------------------------------------------------------------------- 1 | FROM microsoft/dotnet:2.0-sdk 2 | 3 | ADD ./Docker/install-redshift-drivers.sh /tmp/install-redshift-drivers.sh 4 | ADD ./Docker/env.sh /tmp/env.sh 5 | 6 | RUN /tmp/install-redshift-drivers.sh 7 | 8 | ADD ./Docker/odbc.ini /etc/odbc.ini 9 | ADD ./Docker/odbcinst.ini /etc/odbcinst.ini 10 | ADD ./Docker/amazon.redshiftodbc.ini /opt/amazon/redshiftodbc/lib/64/amazon.redshiftodbc.ini 11 | 12 | RUN /tmp/env.sh 13 | 14 | ARG source 15 | WORKDIR /app 16 | COPY ${source:-obj/Docker/publish} . 17 | ENTRYPOINT ["dotnet", "CdcTools.CdcToRedshift.dll"] -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Docker/install-redshift-drivers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install the Redshift ODBC driver manager 4 | apt-get update \ 5 | && apt-get install -y --no-install-recommends unixodbc 6 | 7 | 8 | if ! wget https://s3.amazonaws.com/redshift-downloads/drivers/odbc/1.4.1.1001/AmazonRedshiftODBC-64-bit-1.4.1.1001-1.x86_64.deb; then 9 | echo 'Failed to download Redshift ODBC Driver!' 1>&2 10 | exit 1 11 | fi 12 | 13 | # Install the Redshift ODBC driver 14 | apt install ./AmazonRedshiftODBC-64-bit-1.4.1.1001-1.x86_64.deb 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Changes/FullLoadRecord.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.Changes 6 | { 7 | public class FullLoadRecord 8 | { 9 | public FullLoadRecord() 10 | { 11 | Data = new Dictionary(); 12 | } 13 | 14 | public string TableName { get; set; } 15 | public string ChangeKey { get; set; } 16 | public Dictionary Data { get; set; } 17 | public int BatchSeqNo { get; set; } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/dockerfile: -------------------------------------------------------------------------------- 1 | FROM microsoft/dotnet:2.0-sdk 2 | 3 | ADD ./Docker/install-redshift-drivers.sh /tmp/install-redshift-drivers.sh 4 | ADD ./Docker/env.sh /tmp/env.sh 5 | 6 | RUN /tmp/install-redshift-drivers.sh 7 | 8 | ADD ./Docker/odbc.ini /etc/odbc.ini 9 | ADD ./Docker/odbcinst.ini /etc/odbcinst.ini 10 | ADD ./Docker/amazon.redshiftodbc.ini /opt/amazon/redshiftodbc/lib/64/amazon.redshiftodbc.ini 11 | 12 | RUN /tmp/env.sh 13 | 14 | ARG source 15 | WORKDIR /app 16 | COPY ${source:-obj/Docker/publish} . 17 | ENTRYPOINT ["dotnet", "CdcTools.KafkaToRedshift.dll"] -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/State/StateResult.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.State 6 | { 7 | public enum Result 8 | { 9 | NoStoredState, 10 | StateReturned 11 | } 12 | 13 | public class StateResult 14 | { 15 | public StateResult(Result result, T state) 16 | { 17 | Result = result; 18 | State = state; 19 | } 20 | 21 | public Result Result { get; set; } 22 | public T State { get; set; } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/IFullLoadRepository.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Changes; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace CdcTools.CdcReader.Tables 8 | { 9 | public interface IFullLoadRepository 10 | { 11 | Task GetRowCountAsync(TableSchema tableSchema); 12 | Task GetFirstBatchAsync(TableSchema tableSchema, int batchSize); 13 | Task GetBatchAsync(TableSchema tableSchema, PrimaryKeyValue lastRetrievedKey, int batchSize); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Transactional/TransactionBatch.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Changes; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | 6 | namespace CdcTools.CdcReader.Transactional 7 | { 8 | public class TransactionBatch 9 | { 10 | public TransactionBatch() 11 | { 12 | Changes = new List(); 13 | } 14 | 15 | public TransactionId Id { get; set; } 16 | public List Changes { get; set; } 17 | public int Part { get; set; } 18 | public bool IsMultiPart { get; set; } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Transactional/State/StateResult.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.Transactional.State 6 | { 7 | public enum Result 8 | { 9 | NoStoredTransationId, 10 | TransactionIdReturned 11 | } 12 | 13 | public class StateResult 14 | { 15 | public StateResult(Result result, T state) 16 | { 17 | Result = result; 18 | State = state; 19 | } 20 | 21 | public Result Result { get; set; } 22 | public T State { get; set; } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Changes/FullLoadBatch.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Tables; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | 6 | namespace CdcTools.CdcReader.Changes 7 | { 8 | public class FullLoadBatch 9 | { 10 | public FullLoadBatch() 11 | { 12 | Records = new List(); 13 | } 14 | 15 | public TableSchema TableSchema { get; set; } 16 | public PrimaryKeyValue FirstRowKey { get; set; } 17 | public PrimaryKeyValue LastRowKey { get; set; } 18 | public List Records { get; set; } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/RowChange.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Changes; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | 6 | namespace CdcTools.CdcToKafka.Streaming 7 | { 8 | public class RowChange 9 | { 10 | public RowChange() 11 | { 12 | Data = new Dictionary(); 13 | } 14 | 15 | public string Lsn { get; set; } 16 | public string SeqVal { get; set; } 17 | public string ChangeKey { get; set; } 18 | public ChangeType ChangeType { get; set; } 19 | public Dictionary Data { get; set; } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/ICdcRepository.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Changes; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace CdcTools.CdcReader.Tables 8 | { 9 | public interface ICdcRepository 10 | { 11 | Task GetMinValidLsnAsync(string tableName); 12 | Task GetMaxLsnAsync(); 13 | Task GetChangeBatchAsync(TableSchema tableSchema, byte[] fromLsn, byte[] fromSeqVal, byte[] toLsn, int batchSize); 14 | Task GetChangeBatchAsync(TableSchema tableSchema, byte[] fromLsn, byte[] toLsn, int batchSize); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/TableColumn.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.Tables 6 | { 7 | internal class TableColumn 8 | { 9 | public string Schema { get; set; } 10 | public string TableName { get; set; } 11 | public int OrdinalPosition { get; set; } 12 | public string ColumnName { get; set; } 13 | public string DataType { get; set; } 14 | public int MaxCharsLength { get; set; } 15 | public bool IsNullable { get; set; } 16 | public int NumericScale { get; set; } 17 | public int NumericPrecision { get; set; } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Redshift/IRedshiftWriter.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.KafkaToRedshift.Consumers; 2 | using CdcTools.Redshift.Changes; 3 | using System; 4 | using System.Collections.Concurrent; 5 | using System.Collections.Generic; 6 | using System.Text; 7 | using System.Threading; 8 | using System.Threading.Tasks; 9 | 10 | namespace CdcTools.KafkaToRedshift.Redshift 11 | { 12 | public interface IRedshiftWriter 13 | { 14 | Task CacheTableColumnsAsync(List tables); 15 | Task StartWritingAsync(CancellationToken token, TimeSpan windowSizePeriod, int windowSizeItems, string table, BlockingCollection> accumulatedChanges); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/appsettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "AwsRegion": "eu-west-1", 3 | "RedshiftServer": "cdc-rs-cluster.csxqtb3sh3rz.eu-west-1.redshift.amazonaws.com", 4 | "RedshiftPort": "5439", 5 | "RedshiftDbName": "dev", 6 | "S3BucketName": "cdc-test-67dkfg27d", 7 | "WindowMs": 10000, 8 | "WindowItems": 5000, 9 | "TableTopicPrefix": "cdc-table-", 10 | "Tables": "Person,PersonAddress", 11 | "SerializationMode": "Avro", // Avro, Json 12 | "MessagesHaveKey": false, 13 | "KafkaBootstrapServers": "localhost:9092", 14 | "KafkaSchemaRegistryUrl": "http://localhost:8081", 15 | "StateManagmentConnection": "Server=(local);Database=CdcToRedshift;Trusted_connection=true;" 16 | } 17 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Changes/ChangeBatch.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.Changes 6 | { 7 | public class ChangeBatch 8 | { 9 | public ChangeBatch() 10 | { 11 | Changes = new List(); 12 | } 13 | 14 | public List Changes { get; set; } 15 | public bool MoreChanges { get; set; } 16 | public bool MoreOfLastTransaction { get; set; } 17 | public byte[] FromLsn { get; set; } 18 | public byte[] FromSeqVal { get; set; } 19 | public byte[] ToLsn { get; set; } 20 | public byte[] ToSeqVal { get; set; } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/appsettings.json: -------------------------------------------------------------------------------- 1 | { 2 | //"ExecutionId": "fc159b6f-1c10-4bcf-b744-b9c8c2fc5e3a", 3 | "IntervalMs": 1000, 4 | "TableTopicPrefix": "cdc-table-", 5 | "Tables": "Person,PersonAddress", 6 | "Mode": "full-load", // cdc-nontran, full-load 7 | "SerializationMode": "Json", // Avro, Json 8 | "SendWithKey": false, 9 | "PrintPercentProgressMod": 5, 10 | "BatchSize": 1000, 11 | "DatabaseConnection": "Server=(local);Database=CdcToRedshift;Trusted_connection=true;", 12 | "StateManagmentConnection": "Server=(local);Database=CdcToRedshift;Trusted_connection=true;", 13 | "KafkaBootstrapServers": "192.168.1.33:9092", 14 | "KafkaSchemaRegistryUrl": "http://192.168.1.33:8081" 15 | } 16 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/Docker/amazon.redshiftodbc.ini: -------------------------------------------------------------------------------- 1 | ## - Note that this default DriverManagerEncoding of UTF-32 is for iODBC. 2 | ## - unixODBC uses UTF-16 by default. 3 | ## - If unixODBC was compiled with -DSQL_WCHART_CONVERT, then UTF-32 is the correct value. 4 | ## Execute 'odbc_config --cflags' to determine if you need UTF-32 or UTF-16 on unixODBC 5 | ## - SimbaDM can be used with UTF-8 or UTF-16. 6 | ## The DriverUnicodeEncoding setting will cause SimbaDM to run in UTF-8 when set to 2 or UTF-16 when set to 1. 7 | 8 | [Driver] 9 | DriverManagerEncoding=UTF-16 10 | ErrorMessagesPath=/opt/amazon/redshiftodbc/ErrorMessages 11 | LogPath=[LogPath] 12 | SwapFilePath=/tmp 13 | 14 | # unixODBC 15 | ODBCInstLib=libodbcinst.so.2 -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Docker/amazon.redshiftodbc.ini: -------------------------------------------------------------------------------- 1 | ## - Note that this default DriverManagerEncoding of UTF-32 is for iODBC. 2 | ## - unixODBC uses UTF-16 by default. 3 | ## - If unixODBC was compiled with -DSQL_WCHART_CONVERT, then UTF-32 is the correct value. 4 | ## Execute 'odbc_config --cflags' to determine if you need UTF-32 or UTF-16 on unixODBC 5 | ## - SimbaDM can be used with UTF-8 or UTF-16. 6 | ## The DriverUnicodeEncoding setting will cause SimbaDM to run in UTF-8 when set to 2 or UTF-16 when set to 1. 7 | 8 | [Driver] 9 | DriverManagerEncoding=UTF-16 10 | ErrorMessagesPath=/opt/amazon/redshiftodbc/ErrorMessages 11 | LogPath=[LogPath] 12 | SwapFilePath=/tmp 13 | 14 | # unixODBC 15 | ODBCInstLib=libodbcinst.so.2 -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/State/IStateManager.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Changes; 2 | using CdcTools.CdcReader.Tables; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace CdcTools.CdcReader.State 9 | { 10 | public interface IStateManager 11 | { 12 | Task StoreCdcOffsetAsync(string executionId, string tableName, Offset offset); 13 | Task> GetLastCdcOffsetAsync(string executionId, string tableName); 14 | Task StorePkOffsetAsync(string executionId, string tableName, PrimaryKeyValue pkValue); 15 | Task> GetLastPkOffsetAsync(string executionId, string tableName); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/CdcTools.Redshift/RedshiftConfiguration.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.Redshift 6 | { 7 | public class RedshiftConfiguration 8 | { 9 | public string Region { get; set; } 10 | public string Server { get; set; } 11 | public string Port { get; set; } 12 | public string MasterUsername { get; set; } 13 | public string MasterUserPassword { get; set; } 14 | public string DBName { get; set; } 15 | public string IamRole { get; set; } 16 | public string S3BucketName { get; set; } 17 | public string AccessKey { get; set; } 18 | public string SecretAccessKey { get; set; } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/appsettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "ExecutionId": "12345", 3 | "IntervalMs": 10000, 4 | "Tables": "Person,PersonAddress", 5 | "Mode": "cdc-tran", // cdc-nontran, cdc-tran, full-load 6 | "PrintPercentProgressMod": 10, 7 | "NonTransactionalTableBatchSize": 1000, 8 | "PerTableBufferLimit": 1000, 9 | "TransactionBufferLimit": 100, 10 | "TransactionBatchSizeLimit": 100000, 11 | "DatabaseConnection": "Server=(local);Database=CdcToRedshift;Trusted_connection=true;", 12 | "StateManagmentConnection": "Server=(local);Database=CdcToRedshift;Trusted_connection=true;", 13 | "AwsRegion": "eu-west-1", 14 | "RedshiftServer": "cdc-rs-cluster.csxqtb3sh3rz.eu-west-1.redshift.amazonaws.com", 15 | "RedshiftPort": "5439", 16 | "RedshiftDbName": "dev", 17 | "S3BucketName": "cdc-test-67dkfg27d" 18 | } 19 | -------------------------------------------------------------------------------- /src/CdcTools.Redshift/S3/IS3Uploader.cs: -------------------------------------------------------------------------------- 1 | using Amazon.S3; 2 | using CdcTools.Redshift.Changes; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace CdcTools.Redshift.S3 9 | { 10 | public interface IS3Uploader 11 | { 12 | Task PutS3UpsertAsync(AmazonS3Client s3Client, string table, List changeRecords, List orderedCols); 13 | Task PutS3DeleteAsync(AmazonS3Client s3Client, string table, List changeRecords, List orderedCols); 14 | Task PutS3UpsertPartAsync(AmazonS3Client s3Client, string table, List changeRecords, List orderedCols, int part); 15 | Task PutS3DeletePartAsync(AmazonS3Client s3Client, string table, List changeRecords, List orderedCols, int part); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/Producers/ProducerBase.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Changes; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | 6 | namespace CdcTools.CdcToKafka.Streaming.Producers 7 | { 8 | public class ProducerBase 9 | { 10 | protected Dictionary _config; 11 | protected string _topic; 12 | 13 | public ProducerBase(string topic) 14 | { 15 | _topic = topic; 16 | } 17 | 18 | public RowChange Convert(ChangeRecord changeRecord) 19 | { 20 | return new RowChange() 21 | { 22 | ChangeType = changeRecord.ChangeType, 23 | Data = changeRecord.Data, 24 | ChangeKey = changeRecord.ChangeKey, 25 | Lsn = changeRecord.LsnStr, 26 | SeqVal = changeRecord.SeqValStr 27 | }; 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /environment/kafka-readme.txt: -------------------------------------------------------------------------------- 1 | You can use the example docker-compose.yml in the /kafka folder to stand up a Kafka server, schema registry endpoint, REST proxy and Kafka Manager Web UI. 2 | 3 | Note that in order for the CDC apps to be able to see Kafka, you must create a Docker network that will allow them to communicate. This network must be created before starting Kafka and any app. 4 | 5 | Command to create a pre-exising network: 6 | docker network create kafka-shared-net 7 | 8 | If you have a Kafka cluster and schema registry already, then you can remove the references to this network in the various docker-compose.yml files. 9 | 10 | This was all developed and tested on Windows 10 1803. In order to create connectivity to the Kafka service on Windows, you must add "kafkaserver" and "schema-registry" to your hosts file. 11 | 12 | For example, I have: 13 | 192.168.1.33 kafkaserver 14 | 192.168.1.33 schema-registry 15 | 16 | 192.168.1.33 is my local IPv4 address. 17 | 18 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/CdcTools.KafkaToRedshift.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp2.0 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/CdcTools.CdcToRedshift.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp2.0 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Transactional/ITransactionCoordinator.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Changes; 2 | using CdcTools.CdcReader.Tables; 3 | using System.Collections.Concurrent; 4 | using System.Collections.Generic; 5 | using System.Text; 6 | using System.Threading; 7 | using System.Threading.Tasks; 8 | 9 | namespace CdcTools.CdcReader.Transactional 10 | { 11 | public interface ITransactionCoordinator 12 | { 13 | Dictionary> StartTableReaders(CancellationToken token, 14 | List tableSchemas, 15 | int batchSize, 16 | byte[] lastRetrievedLsn); 17 | 18 | void StartGroupingTransactions(CancellationToken token, 19 | List tableSchemas, 20 | Dictionary> tableChangeBuffers, 21 | BlockingCollection transactionBatchBuffer, 22 | int transactionBatchSizeLimit); 23 | 24 | bool IsCompleted(); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/CdcTools.CdcToKafka.Streaming.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp2.0 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /environment/CdcApps/Docker/full-load-kafka/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | FullLoadToKafka: 5 | image: "jackvanlightly/cdctools.cdc-to-kafka-streaming:latest" 6 | container_name: cdctools.cdc-to-kafka-streaming 7 | environment: 8 | CDCTOOLS_IntervalMs: 5000 9 | CDCTOOLS_TableTopicPrefix: cdc-table- 10 | CDCTOOLS_Tables: Person,PersonAddress 11 | CDCTOOLS_Mode: full-load 12 | CDCTOOLS_SerializationMode: Json 13 | CDCTOOLS_SendWithKey: 'false' 14 | CDCTOOLS_PrintPercentProgressMod: 5 15 | CDCTOOLS_NonTransactionalTableBatchSize: 1000 16 | CDCTOOLS_DatabaseConnection: Server=192.168.1.33,1433;Database=CdcToRedshift;User Id=cdc-user;Password=cdc12345; 17 | CDCTOOLS_StateManagmentConnection: Server=192.168.1.33,1433;Database=CdcToRedshift;User Id=cdc-user;Password=cdc12345; 18 | CDCTOOLS_KafkaBootstrapServers: kafkaserver:9092 19 | CDCTOOLS_KafkaSchemaRegistryUrl: http://schema-registry:8081 20 | logging: 21 | options: 22 | max-size: 50m 23 | networks: 24 | default: 25 | external: 26 | name: kafka-shared-net -------------------------------------------------------------------------------- /environment/SourceTables/create-tables.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE [CdcToRedshift] 2 | GO 3 | 4 | USE [CdcToRedshift] 5 | GO 6 | 7 | CREATE TABLE [dbo].[Person]( 8 | [PersonId] [int] NOT NULL, 9 | [FirstName] [varchar](50) NOT NULL, 10 | [Surname] [varchar](50) NOT NULL, 11 | [DateOfBirth] [datetime] NOT NULL, 12 | PRIMARY KEY CLUSTERED([PersonId] ASC) 13 | ) 14 | GO 15 | 16 | CREATE TABLE [dbo].[PersonAddress]( 17 | [AddressId] [int] NOT NULL, 18 | [Addressline1] [nvarchar](1000) NOT NULL, 19 | [City] [nvarchar](100) NOT NULL, 20 | [Postalcode] [nvarchar](20) NOT NULL, 21 | [Country] [nvarchar](100) NOT NULL, 22 | [PersonId] [int] NOT NULL, 23 | PRIMARY KEY CLUSTERED([AddressId] ASC) 24 | ) 25 | GO 26 | 27 | -- enable CDC on this database 28 | sys.sp_cdc_enable_db 29 | 30 | -- enable CDC on the Person table 31 | EXEC sys.sp_cdc_enable_table 32 | @source_schema = N'dbo', 33 | @source_name = N'Person', 34 | @role_name = NULL, 35 | @supports_net_changes = 1 36 | 37 | -- enable CDC on the PersonAddress table 38 | EXEC sys.sp_cdc_enable_table 39 | @source_schema = N'dbo', 40 | @source_name = N'PersonAddress', 41 | @role_name = NULL, 42 | @supports_net_changes = 1 43 | GO -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Transactional/TransactionId.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Numerics; 4 | using System.Text; 5 | 6 | namespace CdcTools.CdcReader.Transactional 7 | { 8 | public class TransactionId 9 | { 10 | public TransactionId(byte[] lsn, string lsnStr, BigInteger lsnInt) 11 | { 12 | Lsn = lsn; 13 | LsnStr = lsnStr; 14 | LsnInt = lsnInt; 15 | } 16 | 17 | public byte[] Lsn { get; set; } 18 | public string LsnStr { get; set; } 19 | public BigInteger LsnInt { get; set; } 20 | 21 | public override bool Equals(object obj) 22 | { 23 | var tranId = obj as TransactionId; 24 | if (tranId == null || LsnStr == null) 25 | return false; 26 | 27 | if (LsnStr == null) 28 | return false; 29 | 30 | return LsnStr.Equals(tranId.LsnStr, StringComparison.Ordinal); 31 | } 32 | 33 | public override int GetHashCode() 34 | { 35 | if (LsnStr == null) 36 | return 0; 37 | 38 | return LsnStr.GetHashCode(); 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Changes/PrimaryKeyValue.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace CdcTools.CdcReader.Changes 7 | { 8 | public class KeyColumnValue 9 | { 10 | public KeyColumnValue(int ordinalPosition, string columnName, object value) 11 | { 12 | OrdinalPosition = ordinalPosition; 13 | ColumnName = columnName; 14 | Value = value; 15 | } 16 | 17 | public int OrdinalPosition { get; set; } 18 | public string ColumnName { get; set; } 19 | public object Value { get; set; } 20 | } 21 | 22 | public class PrimaryKeyValue 23 | { 24 | public PrimaryKeyValue() 25 | { 26 | Keys = new List(); 27 | } 28 | 29 | public List Keys { get; set; } 30 | 31 | public void AddKeyValue(int ordinalPosition, string column, object value) 32 | { 33 | Keys.Add(new KeyColumnValue(ordinalPosition, column, value)); 34 | } 35 | 36 | public object GetValue(int ordinalPosition) 37 | { 38 | return Keys.Single(x => x.OrdinalPosition == ordinalPosition).Value; 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /environment/CdcApps/Docker/cdc-non-tran-redshift/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | CdcToRedshift: 5 | image: "jackvanlightly/cdctools.cdc-to-redshift:latest" 6 | container_name: cdctools.cdc-to-redshift 7 | environment: 8 | CDCTOOLS_IntervalMs: 30000 9 | CDCTOOLS_ExecutionId: my-cdc-streaming-execution 10 | CDCTOOLS_Tables: Person,PersonAddress 11 | CDCTOOLS_Mode: cdc-nontran 12 | CDCTOOLS_NonTransactionalTableBatchSize: 1000 13 | CDCTOOLS_DatabaseConnection: Server=192.168.1.33,1433;Database=CdcToRedshift;User Id=cdc-user;Password=cdc12345; 14 | CDCTOOLS_StateManagmentConnection: Server=192.168.1.33,1433;Database=CdcToRedshift;User Id=cdc-user;Password=cdc12345; 15 | CDCTOOLS_AccessKey: your-access-key-here 16 | CDCTOOLS_SecretAccessKey: your-secret-access-key-here 17 | CDCTOOLS_RedshiftUser: your-redshift-user-name-here 18 | CDCTOOLS_RedshiftPassword: your-redshift-user-password-here 19 | CDCTOOLS_RedshiftRole: your-redshift-role-arn-here 20 | CDCTOOLS_AwsRegion: your-region-here 21 | CDCTOOLS_RedshiftServer: your-redshift-cluster-server 22 | CDCTOOLS_RedshiftPort: 5439 23 | CDCTOOLS_RedshiftDbName: dev 24 | CDCTOOLS_S3BucketName: my-s3-bucket-here 25 | logging: 26 | options: 27 | max-size: 50m -------------------------------------------------------------------------------- /environment/CdcApps/Docker/full-redshift/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | FullLoadToRedshift: 5 | image: "jackvanlightly/cdctools.cdc-to-redshift:latest" 6 | container_name: cdctools.cdc-to-redshift 7 | environment: 8 | CDCTOOLS_ExecutionId: my-one-off-full-load 9 | CDCTOOLS_Tables: Person,PersonAddress 10 | CDCTOOLS_Mode: full-load 11 | CDCTOOLS_PrintPercentProgressMod: 5 12 | CDCTOOLS_NonTransactionalTableBatchSize: 1000 13 | CDCTOOLS_DatabaseConnection: Server=192.168.1.33,1433;Database=CdcToRedshift;User Id=cdc-user;Password=cdc12345; 14 | CDCTOOLS_StateManagmentConnection: Server=192.168.1.33,1433;Database=CdcToRedshift;User Id=cdc-user;Password=cdc12345; 15 | CDCTOOLS_AccessKey: your-access-key-here 16 | CDCTOOLS_SecretAccessKey: your-secret-access-key-here 17 | CDCTOOLS_RedshiftUser: your-redshift-user-name-here 18 | CDCTOOLS_RedshiftPassword: your-redshift-user-password-here 19 | CDCTOOLS_RedshiftRole: your-redshift-role-arn-here 20 | CDCTOOLS_AwsRegion: your-region-hereeg->eu-west-1 21 | CDCTOOLS_RedshiftServer: your-redshift-cluster-server 22 | CDCTOOLS_RedshiftPort: 5439 23 | CDCTOOLS_RedshiftDbName: dev 24 | CDCTOOLS_S3BucketName: my-s3-bucket-here 25 | logging: 26 | options: 27 | max-size: 50m -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Changes/ChangeRecord.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Numerics; 4 | using System.Text; 5 | 6 | namespace CdcTools.CdcReader.Changes 7 | { 8 | public class ChangeRecord 9 | { 10 | public ChangeRecord() 11 | { 12 | Data = new Dictionary(); 13 | } 14 | 15 | public string TableName { get; set; } 16 | public byte[] Lsn { get; set; } 17 | public string LsnStr { get; set; } 18 | 19 | public byte[] SeqVal { get; set; } 20 | public string SeqValStr { get; set; } 21 | public string ChangeKey { get; set; } 22 | public ChangeType ChangeType { get; set; } 23 | public Dictionary Data { get; set; } 24 | 25 | private BigInteger _lsn; 26 | public BigInteger LsnInt 27 | { 28 | get 29 | { 30 | if (_lsn == 0) 31 | _lsn = BigInteger.Parse(LsnStr); 32 | 33 | return _lsn; 34 | } 35 | } 36 | 37 | private BigInteger _seqVal; 38 | public BigInteger SeqValInt 39 | { 40 | get 41 | { 42 | if (_seqVal == 0) 43 | _seqVal = BigInteger.Parse(SeqValStr); 44 | 45 | return _seqVal; 46 | } 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /environment/CdcApps/Docker/cdc-tran-redshift/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | CdcToRedshift: 5 | image: "jackvanlightly/cdctools.cdc-to-redshift:latest" 6 | container_name: cdctools.cdc-to-redshift 7 | environment: 8 | CDCTOOLS_IntervalMs: 30000 9 | CDCTOOLS_ExecutionId: my-cdc-streaming-execution 10 | CDCTOOLS_Tables: Person,PersonAddress 11 | CDCTOOLS_Mode: cdc-tran 12 | CDCTOOLS_PerTableBufferLimit: 1000 13 | CDCTOOLS_TransactionBufferLimit: 100 14 | CDCTOOLS_TransactionBatchSizeLimit: 100000 15 | CDCTOOLS_DatabaseConnection: Server=192.168.1.33,1433;Database=CdcToRedshift;User Id=cdc-user;Password=cdc12345; 16 | CDCTOOLS_StateManagmentConnection: Server=192.168.1.33,1433;Database=CdcToRedshift;User Id=cdc-user;Password=cdc12345; 17 | CDCTOOLS_AccessKey: your-access-key-here 18 | CDCTOOLS_SecretAccessKey: your-secret-access-key-here 19 | CDCTOOLS_RedshiftUser: your-redshift-user-name-here 20 | CDCTOOLS_RedshiftPassword: your-redshift-user-password-here 21 | CDCTOOLS_RedshiftRole: your-redshift-role-arn-here 22 | CDCTOOLS_AwsRegion: your-region-here 23 | CDCTOOLS_RedshiftServer: your-redshift-cluster-server 24 | CDCTOOLS_RedshiftPort: 5439 25 | CDCTOOLS_RedshiftDbName: dev 26 | CDCTOOLS_S3BucketName: my-s3-bucket-here 27 | logging: 28 | options: 29 | max-size: 50m -------------------------------------------------------------------------------- /src/CdcTools.Redshift/Changes/RowChange.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Numerics; 4 | using System.Text; 5 | 6 | namespace CdcTools.Redshift.Changes 7 | { 8 | public class RowChange 9 | { 10 | public RowChange() 11 | { 12 | Data = new Dictionary(); 13 | } 14 | 15 | public string Lsn { get; set; } 16 | public string SeqVal { get; set; } 17 | public string ChangeKey { get; set; } 18 | public ChangeType ChangeType { get; set; } 19 | public Dictionary Data { get; set; } 20 | 21 | private BigInteger _lsn; 22 | public BigInteger LsnInteger 23 | { 24 | get 25 | { 26 | if (_lsn == 0) 27 | { 28 | if (Lsn != null) 29 | _lsn = BigInteger.Parse(Lsn); 30 | } 31 | 32 | return _lsn; 33 | } 34 | } 35 | 36 | private BigInteger _seqVal; 37 | public BigInteger SeqValInteger 38 | { 39 | get 40 | { 41 | if (_seqVal == 0) 42 | { 43 | if (SeqVal != null) 44 | _seqVal = BigInteger.Parse(SeqVal); 45 | } 46 | 47 | return _seqVal; 48 | } 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /environment/SourceTables/modify-data.sql: -------------------------------------------------------------------------------- 1 | USE [CdcToRedshift] 2 | GO 3 | 4 | INSERT INTO [dbo].[Person]([PersonId],[FirstName],[Surname],[DateOfBirth]) 5 | VALUES 6 | (1, 'Jack','Hopkins','19970126'), 7 | (2, 'Jon','Burlington','19880123'), 8 | (3, 'James','Hackney','19640521'), 9 | (4, 'Jim','Smith','19551009') 10 | 11 | INSERT INTO [dbo].[Person]([PersonId],[FirstName],[Surname],[DateOfBirth]) 12 | VALUES 13 | (5, 'Kerry','Harry','19780503'), 14 | (6, 'Katie','Jones','19720918'), 15 | (7, 'Kelly','Maguire','20011201'), 16 | (8,'Kathryn','May','19520702') 17 | 18 | DELETE FROM [dbo].[Person] 19 | WHERE FirstName LIKE 'J%' 20 | 21 | UPDATE [dbo].[Person] 22 | SET DateOfBirth = '19700228' 23 | WHERE FirstName = 'Jack' 24 | 25 | INSERT INTO [dbo].[PersonAddress]([AddressId],[Addressline1],[City],[Country],[Postalcode],[PersonId]) 26 | VALUES 27 | (1, '12 Ocean Drive', 'Los Angeles', 'USA', '90210', 1), 28 | (2, '13 Mullholand Drive', 'Los Angeles', 'USA', '92340', 2), 29 | (3, '99 Seaview Road', 'Southampton', 'SO12 4GH', 'UK', 3), 30 | (4, '15 Woodland Close', 'Birmingam', 'BR34 D2R', 'UK', 4) 31 | 32 | INSERT INTO [dbo].[PersonAddress]([AddressId],[Addressline1],[City],[Country],[Postalcode],[PersonId]) 33 | VALUES 34 | (5, '117 Fish Street', 'Lancaster', 'LA23 4BH', 'UK', 5), 35 | (6, '3 Whale Road', 'Glasgow', 'GL23 6TR', 'UK', 6), 36 | (7, '4 Orca Street', 'Perth', '12345', 'Australia', 7), 37 | (8, '14 Narwhal Drive', 'Sydney', '54321','Australia', 8) 38 | 39 | DELETE FROM [dbo].[PersonAddress] -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/TableSchema.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace CdcTools.CdcReader.Tables 7 | { 8 | public class TableSchema 9 | { 10 | public string Schema { get; set; } 11 | public string TableName { get; set; } 12 | public IList Columns { get; set; } 13 | public IList PrimaryKeys { get; set; } 14 | 15 | private Dictionary _columnsDict; 16 | public ColumnSchema GetColumn(string columnName) 17 | { 18 | if (_columnsDict == null) 19 | { 20 | _columnsDict = new Dictionary(); 21 | foreach (var col in Columns) 22 | _columnsDict.Add(col.Name, col); 23 | } 24 | 25 | return _columnsDict[columnName]; 26 | } 27 | 28 | public string GetOrderedPrimaryKeyColumns() 29 | { 30 | if (PrimaryKeys.Count == 1) 31 | return PrimaryKeys.First().ColumnName; 32 | 33 | var sb = new StringBuilder(); 34 | int ctr = 0; 35 | foreach (var pk in PrimaryKeys.OrderBy(x => x.OrdinalPosition)) 36 | { 37 | if (ctr > 0) 38 | sb.Append(","); 39 | 40 | sb.Append(pk.ColumnName); 41 | ctr++; 42 | } 43 | 44 | return sb.ToString(); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/Producers/ProducerFactory.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcToKafka.Streaming.Serialization; 2 | using CdcTools.CdcReader.Tables; 3 | using CdcTools.SqlToAvro; 4 | using System; 5 | using System.Collections.Generic; 6 | using System.Text; 7 | 8 | namespace CdcTools.CdcToKafka.Streaming.Producers 9 | { 10 | internal class ProducerFactory 11 | { 12 | public static IKafkaProducer GetProducer(string topic, 13 | TableSchema tableSchema, 14 | SerializationMode serializationMode, 15 | bool sendWithKey, 16 | string kafkaBootstrapServers, 17 | string schemaRegistryUrl) 18 | { 19 | if (serializationMode == SerializationMode.Avro) 20 | { 21 | var avroSchema = AvroSchemaGenerator.GenerateSchema("CdcToRedshift", tableSchema); 22 | 23 | if (sendWithKey) 24 | return new KeyedAvroProducer(kafkaBootstrapServers, schemaRegistryUrl, topic, new AvroTableTypeConverter(avroSchema), tableSchema); 25 | else 26 | return new NonKeyedAvroProducer(kafkaBootstrapServers, schemaRegistryUrl, topic, new AvroTableTypeConverter(avroSchema), tableSchema); 27 | } 28 | else 29 | { 30 | if (sendWithKey) 31 | return new KeyedJsonProducer(kafkaBootstrapServers, topic); 32 | else 33 | return new NonKeyedJsonProducer(kafkaBootstrapServers, topic); 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /environment/kafka/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | kafkaserver: 5 | image: "spotify/kafka:latest" 6 | container_name: apachekafka 7 | hostname: kafkaserver 8 | networks: 9 | - kafkanet 10 | ports: 11 | - 2181:2181 12 | - 9092:9092 13 | environment: 14 | ADVERTISED_HOST: kafkaserver 15 | ADVERTISED_PORT: 9092 16 | JMX_PORT: 9999 17 | kafka_manager: 18 | image: "mzagar/kafka-manager-docker:1.3.3.4" 19 | container_name: kafkamanager 20 | networks: 21 | - kafkanet 22 | ports: 23 | - 9000:9000 24 | links: 25 | - kafkaserver 26 | environment: 27 | ZK_HOSTS: "kafkaserver:2181" 28 | schema-registry: 29 | image: "confluentinc/cp-schema-registry" 30 | container_name: schemaregistry 31 | networks: 32 | - kafkanet 33 | ports: 34 | - 8081:8081 35 | links: 36 | - kafkaserver 37 | environment: 38 | SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: 'kafkaserver:2181' 39 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 40 | SCHEMA_REGISTRY_LISTENERS: http://schema-registry:8081 41 | rest-proxy: 42 | image: confluent/rest-proxy 43 | networks: 44 | - kafkanet 45 | links: 46 | - kafkaserver 47 | - schema-registry 48 | ports: 49 | - "8082:8082" 50 | environment: 51 | RP_ZOOKEEPER_CONNECT: "kafkaserver:2181" 52 | RP_SCHEMA_REGISTRY_URL: "http://schema-registry:8081" 53 | networks: 54 | default: 55 | external: 56 | name: kafka-shared-net 57 | kafkanet: 58 | driver: bridge -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/ColumnSchema.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace CdcTools.CdcReader.Tables 6 | { 7 | public class ColumnSchema 8 | { 9 | public int OrdinalPosition { get; set; } 10 | public string Name { get; set; } 11 | public string DataType { get; set; } 12 | public int MaxCharsLength { get; set; } 13 | public bool IsNullable { get; set; } 14 | public int Precision { get; set; } 15 | public int Scale { get; set; } 16 | 17 | public Type GetNetDataType() 18 | { 19 | switch (DataType) 20 | { 21 | case "char": 22 | return typeof(string); 23 | case "varchar": 24 | return typeof(string); 25 | case "nvarchar": 26 | return typeof(string); 27 | case "tinyint": 28 | return typeof(byte); 29 | case "smallint": 30 | return typeof(short); 31 | case "int": 32 | return typeof(int); 33 | case "bigint": 34 | return typeof(long); 35 | case "date": 36 | return typeof(DateTime); 37 | case "datetime": 38 | return typeof(DateTime); 39 | case "datetime2": 40 | return typeof(DateTime); 41 | case "time": 42 | return typeof(DateTime); 43 | case "bit": 44 | return typeof(bool); 45 | case "money": 46 | return typeof(decimal); 47 | case "uniqueidentifier": 48 | return typeof(Guid); 49 | case "varbinary": 50 | return typeof(byte[]); 51 | default: 52 | throw new Exception("SQL data type not supported: " + DataType); 53 | } 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/TableSchemaQueryBuilder.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace CdcTools.CdcReader.Tables 7 | { 8 | internal class TableSchemaQueryBuilder 9 | { 10 | public const string GetColumnsQuery = @"SELECT TABLE_SCHEMA 11 | ,TABLE_NAME 12 | ,COLUMN_NAME 13 | ,ORDINAL_POSITION 14 | ,DATA_TYPE 15 | ,CHARACTER_MAXIMUM_LENGTH 16 | ,IS_NULLABLE 17 | ,NUMERIC_SCALE 18 | ,NUMERIC_PRECISION 19 | FROM INFORMATION_SCHEMA.COLUMNS"; 20 | 21 | public static string GetColumnsOfTableQuery() 22 | { 23 | return GetColumnsQuery 24 | + Environment.NewLine + "WHERE TABLE_NAME = @TableName"; 25 | } 26 | 27 | public const string GetPrimaryKeysQuery = @"SELECT OBJECT_NAME(IC.OBJECT_ID) As TableName 28 | ,COL_NAME(IC.OBJECT_ID, IC.COLUMN_ID) AS ColumnName 29 | ,IC.KEY_ORDINAL AS OrdinalPosition 30 | FROM SYS.INDEXES AS I 31 | JOIN SYS.INDEX_COLUMNS AS IC ON I.OBJECT_ID = IC.OBJECT_ID 32 | AND I.INDEX_ID = IC.INDEX_ID 33 | WHERE I.IS_PRIMARY_KEY = 1"; 34 | 35 | public static string GetPrimaryKeyColumnsOfTableQuery() 36 | { 37 | return GetPrimaryKeysQuery + Environment.NewLine + "AND OBJECT_NAME(IC.OBJECT_ID) = @TableName"; 38 | } 39 | 40 | public static string GetExtractQueryUsingAllKeys(TableSchema tableSchema, int batchSize) 41 | { 42 | var sb = new StringBuilder(); 43 | sb.Append($"SELECT TOP {batchSize} * FROM {tableSchema.Schema}.{tableSchema.TableName}"); 44 | sb.Append(" WHERE "); 45 | 46 | foreach (var pk in tableSchema.PrimaryKeys.OrderBy(x => x.OrdinalPosition)) 47 | { 48 | if (pk.OrdinalPosition > 1) 49 | sb.Append(" AND "); 50 | sb.Append(pk.ColumnName); 51 | sb.Append(" > @p" + pk.OrdinalPosition); 52 | } 53 | 54 | sb.Append(" OPTION(RECOMPILE)"); 55 | 56 | return sb.ToString(); 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /environment/CdcApps/Docker/cdc-kafka-redshift/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | CdcToKafka: 5 | image: "jackvanlightly/cdctools.cdc-to-kafka-streaming:latest" 6 | container_name: cdctools.cdc-to-kafka-streaming 7 | environment: 8 | CDCTOOLS_IntervalMs: 30000 9 | CDCTOOLS_TableTopicPrefix: cdc-table- 10 | CDCTOOLS_Tables: Person,PersonAddress 11 | CDCTOOLS_Mode: cdc 12 | CDCTOOLS_SerializationMode: Avro 13 | CDCTOOLS_SendWithKey: 'false' 14 | CDCTOOLS_NonTransactionalTableBatchSize: 1000 15 | CDCTOOLS_DatabaseConnection: Server=192.168.1.33,1433;Database=CdcToRedshift;User Id=cdc-user;Password=cdc12345; 16 | CDCTOOLS_StateManagmentConnection: Server=192.168.1.33,1433;Database=CdcToRedshift;User Id=cdc-user;Password=cdc12345; 17 | CDCTOOLS_KafkaBootstrapServers: kafkaserver:9092 18 | CDCTOOLS_KafkaSchemaRegistryUrl: http://schema-registry:8081 19 | logging: 20 | options: 21 | max-size: 50m 22 | KafkaToRedshift: 23 | image: "jackvanlightly/cdctools.kafka-to-redshift:latest" 24 | container_name: cdctools.kafka-to-redshift 25 | environment: 26 | CDCTOOLS_AccessKey: your-access-key-here 27 | CDCTOOLS_SecretAccessKey: your-secret-access-key-here 28 | CDCTOOLS_RedshiftUser: your-redshift-user-name-here 29 | CDCTOOLS_RedshiftPassword: your-redshift-user-password-here 30 | CDCTOOLS_RedshiftRole: your-redshift-role-arn-here 31 | CDCTOOLS_AwsRegion: your-region-hereeg->eu-west-1 32 | CDCTOOLS_RedshiftServer: your-redshift-cluster-server 33 | CDCTOOLS_RedshiftPort: 5439 34 | CDCTOOLS_RedshiftDbName: dev 35 | CDCTOOLS_S3BucketName: my-s3-bucket-here 36 | CDCTOOLS_WindowMs: 30000 37 | CDCTOOLS_WindowItems: 5000 38 | CDCTOOLS_TableTopicPrefix: cdc-table- 39 | CDCTOOLS_Tables: Person,PersonAddress 40 | CDCTOOLS_SerializationMode: Avro 41 | CDCTOOLS_MessagesHaveKey: 'false' 42 | CDCTOOLS_KafkaBootstrapServers: kafkaserver:9092 43 | CDCTOOLS_KafkaSchemaRegistryUrl: http://schema-registry:8081 44 | logging: 45 | options: 46 | max-size: 50m 47 | networks: 48 | default: 49 | external: 50 | name: kafka-shared-net -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/Producers/NonKeyedJsonProducer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | using CdcTools.CdcReader.Changes; 7 | using Confluent.Kafka; 8 | using Newtonsoft.Json; 9 | using Confluent.Kafka.Serialization; 10 | using System.Runtime.InteropServices; 11 | 12 | namespace CdcTools.CdcToKafka.Streaming.Producers 13 | { 14 | public class NonKeyedJsonProducer : ProducerBase, IKafkaProducer 15 | { 16 | private Producer _producer; 17 | 18 | public NonKeyedJsonProducer(string bootstrapServers, string topic) 19 | : base(topic) 20 | { 21 | _config = new Dictionary 22 | { 23 | { "bootstrap.servers", bootstrapServers } 24 | }; 25 | 26 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) 27 | _config.Add("socket.blocking.max.ms", "1"); // workaround for https://github.com/confluentinc/confluent-kafka-dotnet/issues/501 28 | 29 | _producer = new Producer(_config, null, new StringSerializer(Encoding.UTF8)); 30 | } 31 | 32 | public async Task SendAsync(CancellationToken token, ChangeRecord changeRecord) 33 | { 34 | var change = Convert(changeRecord); 35 | var jsonText = JsonConvert.SerializeObject(change); 36 | var sent = false; 37 | while (!sent && !token.IsCancellationRequested) 38 | { 39 | var sendResult = await _producer.ProduceAsync(topic: _topic, key: null, val: jsonText, blockIfQueueFull: true); 40 | if (sendResult.Error.HasError) 41 | { 42 | Console.WriteLine("Could not send: " + sendResult.Error.Reason); 43 | await Task.Delay(100); 44 | } 45 | else 46 | sent = true; 47 | } 48 | } 49 | 50 | private bool _disposed; 51 | public void Dispose() 52 | { 53 | if (!_disposed) 54 | { 55 | if (_producer != null) 56 | _producer.Dispose(); 57 | 58 | _disposed = true; 59 | } 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/Producers/KeyedJsonProducer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | using CdcTools.CdcReader.Changes; 7 | using Confluent.Kafka; 8 | using Newtonsoft.Json; 9 | using Confluent.Kafka.Serialization; 10 | using System.Runtime.InteropServices; 11 | 12 | namespace CdcTools.CdcToKafka.Streaming.Producers 13 | { 14 | public class KeyedJsonProducer : ProducerBase, IKafkaProducer 15 | { 16 | private Producer _producer; 17 | 18 | public KeyedJsonProducer(string bootstrapServers, string topic) 19 | : base(topic) 20 | { 21 | _config = new Dictionary 22 | { 23 | { "bootstrap.servers", bootstrapServers } 24 | }; 25 | 26 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) 27 | _config.Add("socket.blocking.max.ms", "1"); // workaround for https://github.com/confluentinc/confluent-kafka-dotnet/issues/501 28 | 29 | _producer = new Producer(_config, new StringSerializer(Encoding.UTF8), new StringSerializer(Encoding.UTF8)); 30 | } 31 | 32 | public async Task SendAsync(CancellationToken token, ChangeRecord changeRecord) 33 | { 34 | var change = Convert(changeRecord); 35 | var jsonText = JsonConvert.SerializeObject(change); 36 | var sent = false; 37 | while (!sent && !token.IsCancellationRequested) 38 | { 39 | var sendResult = await _producer.ProduceAsync(topic: _topic, key: change.ChangeKey, val: jsonText, blockIfQueueFull: true); 40 | if (sendResult.Error.HasError) 41 | { 42 | Console.WriteLine("Could not send: " + sendResult.Error.Reason); 43 | await Task.Delay(100); 44 | } 45 | else 46 | sent = true; 47 | } 48 | } 49 | 50 | private bool _disposed; 51 | public void Dispose() 52 | { 53 | if (!_disposed) 54 | { 55 | if (_producer != null) 56 | _producer.Dispose(); 57 | 58 | _disposed = true; 59 | } 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Consumers/MessageProxy.cs: -------------------------------------------------------------------------------- 1 | using Avro.Generic; 2 | using Confluent.Kafka; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace CdcTools.KafkaToRedshift.Consumers 9 | { 10 | public class MessageProxy 11 | { 12 | private Consumer _consumerNoKey; 13 | private Consumer _consumerWithKey; 14 | private Consumer _genericConsumerNoKey; 15 | private Consumer _genericConsumerWithKey; 16 | 17 | private Message _messageNoKey; 18 | private Message _messageWithKey; 19 | private Message _genericMessageNoKey; 20 | private Message _genericMessageWithKey; 21 | 22 | public MessageProxy(Consumer consumer, Message message) 23 | { 24 | _consumerNoKey = consumer; 25 | _messageNoKey = message; 26 | } 27 | 28 | public MessageProxy(Consumer consumer, Message message) 29 | { 30 | _consumerWithKey = consumer; 31 | _messageWithKey = message; 32 | } 33 | 34 | public MessageProxy(Consumer consumer, Message message) 35 | { 36 | _genericConsumerNoKey = consumer; 37 | _genericMessageNoKey = message; 38 | } 39 | 40 | public MessageProxy(Consumer consumer, Message message) 41 | { 42 | _genericConsumerWithKey = consumer; 43 | _genericMessageWithKey = message; 44 | } 45 | 46 | public T Payload { set; get; } 47 | 48 | public async Task CommitAsync() 49 | { 50 | if (_consumerNoKey != null) 51 | await _consumerNoKey.CommitAsync(_messageNoKey); 52 | else if (_consumerWithKey != null) 53 | await _consumerWithKey.CommitAsync(_messageWithKey); 54 | else if (_genericConsumerNoKey != null) 55 | await _genericConsumerNoKey.CommitAsync(_genericMessageNoKey); 56 | else if (_genericConsumerWithKey != null) 57 | await _genericConsumerWithKey.CommitAsync(_genericMessageWithKey); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/Producers/NonKeyedAvroProducer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | using CdcTools.CdcReader.Changes; 7 | using CdcTools.CdcToKafka.Streaming.Serialization; 8 | using CdcTools.CdcReader.Tables; 9 | using Confluent.Kafka; 10 | using Avro.Generic; 11 | using Confluent.Kafka.Serialization; 12 | using System.Runtime.InteropServices; 13 | 14 | namespace CdcTools.CdcToKafka.Streaming.Producers 15 | { 16 | public class NonKeyedAvroProducer : ProducerBase, IKafkaProducer 17 | { 18 | private AvroTableTypeConverter _avroTypeConverter; 19 | private TableSchema _tableSchema; 20 | private Producer _producer; 21 | 22 | public NonKeyedAvroProducer(string bootstrapServers, string schemaRegistryUrl, string topic, AvroTableTypeConverter avroTableTypeConverter, TableSchema tableSchema) 23 | : base(topic) 24 | { 25 | _config = new Dictionary 26 | { 27 | { "bootstrap.servers", bootstrapServers }, 28 | { "schema.registry.url", schemaRegistryUrl } 29 | }; 30 | 31 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) 32 | _config.Add("socket.blocking.max.ms", "1"); // workaround for https://github.com/confluentinc/confluent-kafka-dotnet/issues/501 33 | 34 | _producer = new Producer(_config, null, new AvroSerializer()); 35 | _avroTypeConverter = avroTableTypeConverter; 36 | _tableSchema = tableSchema; 37 | } 38 | 39 | public async Task SendAsync(CancellationToken token, ChangeRecord changeRecord) 40 | { 41 | var change = Convert(changeRecord); 42 | var record = _avroTypeConverter.GetRecord(_tableSchema, change); 43 | var sent = false; 44 | while (!sent && !token.IsCancellationRequested) 45 | { 46 | var sendResult = await _producer.ProduceAsync(topic: _topic, key: null, val: record, blockIfQueueFull: true); 47 | if (sendResult.Error.HasError) 48 | { 49 | Console.WriteLine("Could not send: " + sendResult.Error.Reason); 50 | await Task.Delay(100); 51 | } 52 | else 53 | sent = true; 54 | } 55 | } 56 | 57 | private bool _disposed; 58 | public void Dispose() 59 | { 60 | if (!_disposed) 61 | { 62 | if (_producer != null) 63 | _producer.Dispose(); 64 | 65 | _disposed = true; 66 | } 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/Producers/KeyedAvroProducer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | using CdcTools.CdcReader.Changes; 7 | using CdcTools.CdcToKafka.Streaming.Serialization; 8 | using CdcTools.CdcReader.Tables; 9 | using Confluent.Kafka; 10 | using Avro.Generic; 11 | using Confluent.Kafka.Serialization; 12 | using System.Runtime.InteropServices; 13 | 14 | namespace CdcTools.CdcToKafka.Streaming.Producers 15 | { 16 | public class KeyedAvroProducer : ProducerBase, IKafkaProducer 17 | { 18 | private AvroTableTypeConverter _avroTypeConverter; 19 | private TableSchema _tableSchema; 20 | private Producer _producer; 21 | 22 | public KeyedAvroProducer(string bootstrapServers, string schemaRegistryUrl, string topic, AvroTableTypeConverter avroTableTypeConverter, TableSchema tableSchema) 23 | : base(topic) 24 | { 25 | _config = new Dictionary 26 | { 27 | { "bootstrap.servers", bootstrapServers}, 28 | { "schema.registry.url", schemaRegistryUrl } 29 | }; 30 | 31 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) 32 | _config.Add("socket.blocking.max.ms", "1"); // workaround for https://github.com/confluentinc/confluent-kafka-dotnet/issues/501 33 | 34 | _producer = new Producer(_config, new StringSerializer(Encoding.UTF8), new AvroSerializer()); 35 | _avroTypeConverter = avroTableTypeConverter; 36 | _tableSchema = tableSchema; 37 | } 38 | 39 | public async Task SendAsync(CancellationToken token, ChangeRecord changeRecord) 40 | { 41 | var change = Convert(changeRecord); 42 | var record = _avroTypeConverter.GetRecord(_tableSchema, change); 43 | var sent = false; 44 | while (!sent && !token.IsCancellationRequested) 45 | { 46 | var sendResult = await _producer.ProduceAsync(topic: _topic, key: change.ChangeKey, val: record, blockIfQueueFull: true); 47 | if (sendResult.Error.HasError) 48 | { 49 | Console.WriteLine("Could not send: " + sendResult.Error.Reason); 50 | await Task.Delay(100); 51 | } 52 | else 53 | sent = true; 54 | } 55 | } 56 | 57 | private bool _disposed; 58 | public void Dispose() 59 | { 60 | if(!_disposed) 61 | { 62 | if (_producer != null) 63 | _producer.Dispose(); 64 | 65 | _disposed = true; 66 | } 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Redshift/RedshiftWriter.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.KafkaToRedshift.Consumers; 2 | using CdcTools.Redshift; 3 | using CdcTools.Redshift.Changes; 4 | using System; 5 | using System.Collections.Concurrent; 6 | using System.Collections.Generic; 7 | using System.Linq; 8 | using System.Text; 9 | using System.Threading; 10 | using System.Threading.Tasks; 11 | 12 | namespace CdcTools.KafkaToRedshift.Redshift 13 | { 14 | public class RedshiftWriter : IRedshiftWriter 15 | { 16 | private RedshiftClient _redshiftClient; 17 | 18 | public RedshiftWriter(RedshiftClient redshiftClient) 19 | { 20 | _redshiftClient = redshiftClient; 21 | } 22 | 23 | public async Task CacheTableColumnsAsync(List tables) 24 | { 25 | await _redshiftClient.CacheTableColumnsAsync(tables); 26 | } 27 | 28 | public async Task StartWritingAsync(CancellationToken token, TimeSpan windowSizePeriod, int windowSizeItems, string table, BlockingCollection> accumulatedChanges) 29 | { 30 | await WriteToRedshiftAsync(token, windowSizePeriod, windowSizeItems, table, accumulatedChanges); 31 | } 32 | 33 | private async Task WriteToRedshiftAsync(CancellationToken token, TimeSpan windowSize, int windowSizeItems, string tableName, BlockingCollection> accumulatedChanges) 34 | { 35 | tableName = tableName.ToLower(); 36 | 37 | while (!token.IsCancellationRequested && !accumulatedChanges.IsAddingCompleted) 38 | { 39 | // create change window 40 | var messages = EmptyBuffer(accumulatedChanges); 41 | if (messages.Any()) 42 | { 43 | var changesToPut = messages.Select(x => x.Payload).ToList(); 44 | 45 | // upload change window to S3 then Redshift 46 | await _redshiftClient.UploadAsCsvAsync(tableName, changesToPut); 47 | 48 | // commit the last message in the batch 49 | await messages.Last().CommitAsync(); 50 | } 51 | 52 | // wait for interval but check buffered item count regularly and if max size is reached then upload 53 | int secondsWaited = 0; 54 | while(secondsWaited < (int)windowSize.TotalSeconds) 55 | { 56 | if (accumulatedChanges.Count >= windowSizeItems) 57 | break; 58 | 59 | secondsWaited++; 60 | await Task.Delay(1000); 61 | } 62 | } 63 | } 64 | 65 | private List> EmptyBuffer(BlockingCollection> accumulatedChanges) 66 | { 67 | var changesToPut = new List>(); 68 | 69 | MessageProxy change = null; 70 | while (accumulatedChanges.TryTake(out change)) 71 | changesToPut.Add(change); 72 | 73 | return changesToPut; 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/CdcTools.SqlToAvro/AvroSchemaGenerator.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Tables; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Linq; 5 | 6 | namespace CdcTools.SqlToAvro 7 | { 8 | public class AvroSchemaGenerator 9 | { 10 | public static string GenerateSchema(string schemaNamespace, TableSchema tableSchema) 11 | { 12 | var schema = new JObject(); 13 | schema["namespace"] = schemaNamespace; 14 | schema["name"] = tableSchema.TableName; 15 | schema["type"] = "record"; 16 | 17 | var fields = new JArray(); 18 | var lsnField = new JObject(); 19 | lsnField["name"] = "Lsn"; 20 | lsnField["type"] = "string"; 21 | fields.Add(lsnField); 22 | 23 | var seqValField = new JObject(); 24 | seqValField["name"] = "SeqVal"; 25 | seqValField["type"] = "string"; 26 | fields.Add(seqValField); 27 | 28 | var keyField = new JObject(); 29 | keyField["name"] = "ChangeKey"; 30 | keyField["type"] = "string"; 31 | fields.Add(keyField); 32 | 33 | var changeTypeField = new JObject(); 34 | changeTypeField["name"] = "ChangeType"; 35 | changeTypeField["type"] = "int"; 36 | fields.Add(changeTypeField); 37 | 38 | foreach (var column in tableSchema.Columns.OrderBy(x => x.OrdinalPosition)) 39 | { 40 | var field = new JObject(); 41 | field["name"] = column.Name; 42 | 43 | switch (column.DataType) 44 | { 45 | case "varchar": 46 | case "nvarchar": 47 | case "char": 48 | case "nchar": 49 | case "text": 50 | case "uniqueidentifier": 51 | field["type"] = "string"; 52 | break; 53 | case "bigint": 54 | field["type"] = "long"; 55 | break; 56 | case "binary": 57 | case "varbinary": 58 | case "image": 59 | field["type"] = "bytes"; 60 | break; 61 | case "decimal": 62 | field["type"] = "bytes"; 63 | field["logicalType"] = "decimal"; 64 | field["precision"] = column.Precision; 65 | field["scale"] = column.Scale; 66 | break; 67 | case "tinyint": 68 | case "smallint": 69 | case "int": 70 | field["type"] = "int"; 71 | break; 72 | case "date": 73 | field["type"] = "int"; 74 | field["logicalType"] = "date"; 75 | break; 76 | case "datetime": 77 | field["type"] = "long"; 78 | field["logicalType"] = "timestamp-millis"; 79 | break; 80 | case "datetime2": 81 | field["type"] = "long"; 82 | field["logicalType"] = "timestamp-micros"; 83 | break; 84 | case "time": 85 | field["type"] = "long"; 86 | field["logicalType"] = "timestamp-millis"; 87 | break; 88 | } 89 | 90 | fields.Add(field); 91 | } 92 | 93 | schema["fields"] = fields; 94 | 95 | return schema.ToString(); 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Database/CdcTools.CdcReader.Database.sqlproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | CdcTools.CdcReader.Database 8 | 2.0 9 | 4.1 10 | {5073f155-78c7-4576-8c56-33c6543defb7} 11 | Microsoft.Data.Tools.Schema.Sql.Sql130DatabaseSchemaProvider 12 | Database 13 | 14 | 15 | CdcTools.CdcReader.Database 16 | CdcTools.CdcReader.Database 17 | 1033, CI 18 | BySchemaAndSchemaType 19 | True 20 | v4.5 21 | CS 22 | Properties 23 | False 24 | True 25 | True 26 | 27 | 28 | bin\Release\ 29 | $(MSBuildProjectName).sql 30 | False 31 | pdbonly 32 | true 33 | false 34 | true 35 | prompt 36 | 4 37 | 38 | 39 | bin\Debug\ 40 | $(MSBuildProjectName).sql 41 | false 42 | true 43 | full 44 | false 45 | true 46 | true 47 | prompt 48 | 4 49 | 50 | 51 | 11.0 52 | 53 | True 54 | 11.0 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Serialization/AvroTableTypeConverter.cs: -------------------------------------------------------------------------------- 1 | using Avro; 2 | using Avro.Generic; 3 | using CdcTools.Redshift.Changes; 4 | using Newtonsoft.Json.Linq; 5 | using System; 6 | using System.Collections.Generic; 7 | using System.Linq; 8 | using System.Text; 9 | 10 | namespace CdcTools.KafkaToRedshift.Serialization 11 | { 12 | public class AvroTableTypeConverter 13 | { 14 | private DateTime Epoch = new DateTime(1970, 1, 1); 15 | private string _schemaText; 16 | private RecordSchema _schema; 17 | private JObject _schemaJson; 18 | private Dictionary _fieldConversions; 19 | 20 | public AvroTableTypeConverter(RecordSchema schema) 21 | { 22 | _schemaText = schema.ToString(); 23 | _schema = schema; 24 | _schemaJson = JObject.Parse(_schemaText); 25 | 26 | var fields = (JArray)_schemaJson["fields"]; 27 | _fieldConversions = new Dictionary(); 28 | foreach (var field in fields) 29 | { 30 | if (field.Children().Any(x => x.Path.EndsWith("logicalType"))) 31 | _fieldConversions.Add(field["name"].Value(), field["logicalType"].Value()); 32 | else 33 | _fieldConversions.Add(field["name"].Value(), ""); 34 | } 35 | } 36 | 37 | public RowChange GetRowChange(GenericRecord record) 38 | { 39 | var change = new RowChange(); 40 | change.Lsn = (string)record["Lsn"]; 41 | change.SeqVal = (string)record["SeqVal"]; 42 | change.ChangeKey = (string)record["ChangeKey"]; 43 | change.ChangeType = (ChangeType)(int)record["ChangeType"]; 44 | 45 | foreach (var fieldPair in _fieldConversions) 46 | { 47 | switch (fieldPair.Value) 48 | { 49 | case "": 50 | change.Data.Add(fieldPair.Key, record[fieldPair.Key]); 51 | break; 52 | case "decimal": 53 | var decBytes = (byte[])record[fieldPair.Key]; 54 | var dec = Convert.ToDecimal(BitConverter.ToDouble(decBytes, 0)); 55 | change.Data.Add(fieldPair.Key, dec); 56 | break; 57 | case "date": 58 | var date = new DateTime(1970, 1, 1).AddDays((int)record[fieldPair.Key]); 59 | change.Data.Add(fieldPair.Key, date); 60 | break; 61 | case "timestamp-millis": 62 | var datetime = new DateTime(1970, 1, 1).AddMilliseconds((long)record[fieldPair.Key]); 63 | change.Data.Add(fieldPair.Key, datetime); 64 | break; 65 | case "timestamp-micros": 66 | var datetimeMicro = new DateTime(Epoch.Ticks + ((long)record[fieldPair.Key] * 10)); 67 | change.Data.Add(fieldPair.Key, datetimeMicro); 68 | break; 69 | case "time-millis": 70 | var timeMilli = TimeSpan.FromMilliseconds((int)record[fieldPair.Key]); 71 | change.Data.Add(fieldPair.Key, timeMilli); 72 | break; 73 | case "time-micros": 74 | var timeMicro = TimeSpan.FromTicks((long)record[fieldPair.Key] * 10); 75 | change.Data.Add(fieldPair.Key, timeMicro); 76 | break; 77 | 78 | } 79 | } 80 | 81 | return change; 82 | } 83 | 84 | public bool SchemaMatches(RecordSchema schema) 85 | { 86 | return _schema.Equals(schema); 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/Serialization/AvroTableTypeConverter.cs: -------------------------------------------------------------------------------- 1 | using Avro; 2 | using Avro.Generic; 3 | using CdcTools.CdcReader.Tables; 4 | using Newtonsoft.Json.Linq; 5 | using System; 6 | using System.Collections.Generic; 7 | using System.Linq; 8 | using System.Text; 9 | 10 | namespace CdcTools.CdcToKafka.Streaming.Serialization 11 | { 12 | public class AvroTableTypeConverter 13 | { 14 | private DateTime Epoch = new DateTime(1970, 1, 1); 15 | private RecordSchema _schema; 16 | private JObject _schemaJson; 17 | private Dictionary _fieldConversions; 18 | 19 | public AvroTableTypeConverter(string schema) 20 | { 21 | _schema = (RecordSchema)Schema.Parse(schema); 22 | _schemaJson = JObject.Parse(schema); 23 | 24 | var fields = (JArray)_schemaJson["fields"]; 25 | _fieldConversions = new Dictionary(); 26 | foreach (var field in fields) 27 | { 28 | if (field.Children().Any(x => x.Path.EndsWith("logicalType"))) 29 | _fieldConversions.Add(field["name"].Value(), field["logicalType"].Value()); 30 | else 31 | _fieldConversions.Add(field["name"].Value(), ""); 32 | } 33 | } 34 | 35 | public GenericRecord GetRecord(TableSchema tableSchema, RowChange tableChange) 36 | { 37 | var record = new GenericRecord(_schema); 38 | record.Add("Lsn", tableChange.Lsn); 39 | record.Add("SeqVal", tableChange.SeqVal); 40 | record.Add("ChangeKey", tableChange.ChangeKey); 41 | record.Add("ChangeType", (int)tableChange.ChangeType); 42 | 43 | foreach (var column in tableSchema.Columns.OrderBy(x => x.OrdinalPosition)) 44 | { 45 | var value = tableChange.Data[column.Name.ToLower()]; 46 | switch (column.DataType) 47 | { 48 | case "varchar": 49 | case "nvarchar": 50 | case "char": 51 | case "nchar": 52 | case "bigint": 53 | case "binary": 54 | case "varbinary": 55 | case "text": 56 | case "image": 57 | record.Add(column.Name, value); 58 | break; 59 | case "uniqueidentifier": 60 | record.Add(column.Name, ((Guid)value).ToString()); 61 | break; 62 | case "decimal": 63 | record.Add(column.Name, BitConverter.GetBytes(Convert.ToDouble((decimal)value))); 64 | break; 65 | case "tinyint": 66 | record.Add(column.Name, (int)(byte)value); 67 | break; 68 | case "smallint": 69 | record.Add(column.Name, (int)(short)value); 70 | break; 71 | case "int": 72 | record.Add(column.Name, (int)value); 73 | break; 74 | case "date": 75 | record.Add(column.Name, (int)((DateTime)value - Epoch).TotalDays); 76 | break; 77 | case "datetime": 78 | record.Add(column.Name, (long)((DateTime)value - Epoch).TotalMilliseconds); 79 | break; 80 | case "datetime2": 81 | record.Add(column.Name, (((DateTime)value).Ticks - Epoch.Ticks) / 10); 82 | break; 83 | case "time": 84 | record.Add(column.Name, (int)((TimeSpan)value).TotalMilliseconds); 85 | break; 86 | default: 87 | throw new Exception("Unsupported type: " + column.DataType); 88 | } 89 | } 90 | 91 | return record; 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/CdcReaderClient.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Changes; 2 | using CdcTools.CdcReader.State; 3 | using CdcTools.CdcReader.Tables; 4 | using System; 5 | using System.Collections.Generic; 6 | using System.Threading.Tasks; 7 | 8 | namespace CdcTools.CdcReader 9 | { 10 | public class CdcReaderClient 11 | { 12 | private ICdcRepository _cdcRepository; 13 | private ITableSchemaRepository _tableSchemaRepository; 14 | private IFullLoadRepository _fullLoadRepository; 15 | private IStateManager _stateManager; 16 | 17 | public CdcReaderClient(string connectionString, 18 | string stateManagementConnectionString, 19 | ICdcRepository cdcRepository=null, 20 | ITableSchemaRepository tableSchemaRepository=null, 21 | IFullLoadRepository fullLoadRepository=null, 22 | IStateManager stateManager=null) 23 | { 24 | if (cdcRepository == null) 25 | _cdcRepository = new CdcRepository(connectionString); 26 | else 27 | _cdcRepository = cdcRepository; 28 | 29 | if (tableSchemaRepository == null) 30 | _tableSchemaRepository = new TableSchemaRepository(connectionString); 31 | else 32 | _tableSchemaRepository = tableSchemaRepository; 33 | 34 | if (fullLoadRepository == null) 35 | _fullLoadRepository = new FullLoadRepository(connectionString); 36 | else 37 | _fullLoadRepository = fullLoadRepository; 38 | 39 | if (stateManager == null) 40 | _stateManager = new StateManager(stateManagementConnectionString); 41 | else 42 | _stateManager = stateManager; 43 | } 44 | 45 | public async Task GetMinValidLsnAsync(string tableName) 46 | { 47 | return await _cdcRepository.GetMinValidLsnAsync(tableName); 48 | } 49 | 50 | public async Task GetMaxLsnAsync() 51 | { 52 | return await _cdcRepository.GetMaxLsnAsync(); 53 | } 54 | 55 | public async Task GetChangeBatchAsync(TableSchema tableSchema, byte[] fromLsn, byte[] fromSeqVal, byte[] toLsn, int batchSize) 56 | { 57 | return await _cdcRepository.GetChangeBatchAsync(tableSchema, fromLsn, fromSeqVal, toLsn, batchSize); 58 | } 59 | 60 | public async Task GetChangeBatchAsync(TableSchema tableSchema, byte[] fromLsn, byte[] toLsn, int batchSize) 61 | { 62 | return await _cdcRepository.GetChangeBatchAsync(tableSchema, fromLsn, toLsn, batchSize); 63 | } 64 | 65 | public async Task GetTableSchemaAsync(string tableName) 66 | { 67 | return await _tableSchemaRepository.GetTableSchemaAsync(tableName); 68 | } 69 | 70 | public async Task GetFirstBatchAsync(TableSchema tableSchema, int batchSize) 71 | { 72 | return await _fullLoadRepository.GetFirstBatchAsync(tableSchema, batchSize); 73 | } 74 | 75 | public async Task GetBatchAsync(TableSchema tableSchema, PrimaryKeyValue lastRetrievedKey, int batchSize) 76 | { 77 | return await _fullLoadRepository.GetBatchAsync(tableSchema, lastRetrievedKey, batchSize); 78 | } 79 | 80 | public async Task GetRowCountAsync(TableSchema tableSchema) 81 | { 82 | return await _fullLoadRepository.GetRowCountAsync(tableSchema); 83 | } 84 | 85 | public async Task> GetLastCdcOffsetAsync(string executionId, string tableName) 86 | { 87 | return await _stateManager.GetLastCdcOffsetAsync(executionId, tableName); 88 | } 89 | 90 | public async Task StoreCdcOffsetAsync(string executionId, string tableName, Offset offset) 91 | { 92 | await _stateManager.StoreCdcOffsetAsync(executionId, tableName, offset); 93 | } 94 | 95 | public async Task> GetLastFullLoadOffsetAsync(string executionId, string tableName) 96 | { 97 | return await _stateManager.GetLastPkOffsetAsync(executionId, tableName); 98 | } 99 | 100 | public async Task StoreFullLoadOffsetAsync(string executionId, string tableName, PrimaryKeyValue pkValue) 101 | { 102 | await _stateManager.StorePkOffsetAsync(executionId, tableName, pkValue); 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Transactional/State/StateManager.cs: -------------------------------------------------------------------------------- 1 | using System.Threading.Tasks; 2 | using System.Data; 3 | using System.Linq; 4 | using System.Numerics; 5 | using System.Collections.Generic; 6 | using System.Data.SqlClient; 7 | 8 | namespace CdcTools.CdcReader.Transactional.State 9 | { 10 | public class StateManager : IStateManager 11 | { 12 | private string _connString; 13 | private byte[] _noCdcDataLsn = new byte[10]; 14 | private HashSet _cdcSeen; 15 | 16 | public StateManager(string connectionString) 17 | { 18 | _connString = connectionString; 19 | _cdcSeen = new HashSet(); 20 | } 21 | 22 | public async Task StoreTransactionIdAsync(string executionId, TransactionId transactionId) 23 | { 24 | if(_cdcSeen.Contains(executionId)) 25 | { 26 | await UpdateStoreTransactionIdAsync(executionId, transactionId); 27 | } 28 | else 29 | { 30 | var result = await GetLastTransactionIdAsync(executionId); 31 | if(result.Result == Result.NoStoredTransationId) 32 | { 33 | await InsertStoreTransactionIdAsync(executionId, transactionId); 34 | _cdcSeen.Add(executionId); 35 | } 36 | else 37 | { 38 | await UpdateStoreTransactionIdAsync(executionId, transactionId); 39 | } 40 | } 41 | } 42 | 43 | private async Task InsertStoreTransactionIdAsync(string executionId, TransactionId transactionId) 44 | { 45 | using (var conn = await GetConnectionAsync()) 46 | { 47 | var command = conn.CreateCommand(); 48 | command.CommandText = @"INSERT INTO [CdcTools].[TransactionState]([ExecutionId],[Lsn],[LastUpdate]) 49 | VALUES(@ExecutionId,@Lsn,GETUTCDATE())"; 50 | command.Parameters.Add("ExecutionId", SqlDbType.VarChar, 50).Value = executionId; 51 | command.Parameters.Add("Lsn", SqlDbType.Binary, 10).Value = transactionId.Lsn; 52 | await command.ExecuteNonQueryAsync(); 53 | } 54 | } 55 | 56 | private async Task UpdateStoreTransactionIdAsync(string executionId, TransactionId transactionId) 57 | { 58 | using (var conn = await GetConnectionAsync()) 59 | { 60 | var command = conn.CreateCommand(); 61 | command.CommandText = @"UPDATE [CdcTools].[TransactionState] 62 | SET [Lsn] = @Lsn, 63 | [LastUpdate] = GETUTCDATE() 64 | WHERE ExecutionId = @ExecutionId"; 65 | command.Parameters.Add("ExecutionId", SqlDbType.VarChar, 50).Value = executionId; 66 | command.Parameters.Add("Lsn", SqlDbType.Binary, 10).Value = transactionId.Lsn; 67 | await command.ExecuteNonQueryAsync(); 68 | } 69 | } 70 | 71 | public async Task> GetLastTransactionIdAsync(string executionId) 72 | { 73 | using (var conn = await GetConnectionAsync()) 74 | { 75 | var command = conn.CreateCommand(); 76 | command.CommandText = "SELECT TOP 1 Lsn FROM [CdcTools].[TransactionState] WHERE ExecutionId = @ExecutionId"; 77 | command.Parameters.Add("ExecutionId", SqlDbType.VarChar, 50).Value = executionId; 78 | 79 | using (var reader = await command.ExecuteReaderAsync()) 80 | { 81 | if(reader.Read()) 82 | { 83 | var lsn = (byte[])reader["Lsn"]; 84 | var lsnInt = new BigInteger(lsn.Reverse().ToArray()); 85 | var lsnStr = lsnInt.ToString(); 86 | var id = new TransactionId(lsn, lsnStr, lsnInt); 87 | 88 | return new StateResult(Result.TransactionIdReturned, id); 89 | } 90 | else 91 | { 92 | var lsn = new byte[10]; 93 | var lsnInt = 0; 94 | var lsnStr = "0"; 95 | var id = new TransactionId(lsn, lsnStr, lsnInt); 96 | 97 | return new StateResult(Result.NoStoredTransationId, id); 98 | } 99 | } 100 | } 101 | } 102 | 103 | private async Task GetConnectionAsync() 104 | { 105 | var conn = new SqlConnection(_connString); 106 | await conn.OpenAsync(); 107 | 108 | return conn; 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/CdcTools.Redshift/RedshiftClient.cs: -------------------------------------------------------------------------------- 1 | using Amazon; 2 | using Amazon.S3; 3 | using CdcTools.Redshift.Changes; 4 | using CdcTools.Redshift.S3; 5 | using System; 6 | using System.Collections.Generic; 7 | using System.Text; 8 | using System.Threading.Tasks; 9 | 10 | namespace CdcTools.Redshift 11 | { 12 | public class RedshiftClient 13 | { 14 | private RedshiftConfiguration _configuration; 15 | private IRedshiftDao _redshiftDao; 16 | private IS3Uploader _s3Uploader; 17 | 18 | private Dictionary> _cachedMultiPartDocumentPaths; 19 | private object _cacheSyncRoot = new object(); 20 | 21 | public RedshiftClient(RedshiftConfiguration configuration, 22 | IRedshiftDao redshiftDao = null, 23 | IS3Uploader s3Uploader = null) 24 | { 25 | _configuration = configuration; 26 | 27 | if (redshiftDao == null) 28 | _redshiftDao = new RedshiftDao(configuration); 29 | else 30 | _redshiftDao = redshiftDao; 31 | 32 | if (s3Uploader == null) 33 | _s3Uploader = new S3Uploader(configuration.S3BucketName); 34 | else 35 | _s3Uploader = s3Uploader; 36 | 37 | _cachedMultiPartDocumentPaths = new Dictionary>(); 38 | } 39 | 40 | public async Task CacheTableColumnsAsync(List tableNames) 41 | { 42 | await _redshiftDao.LoadTableColumnsAsync(tableNames); 43 | } 44 | 45 | public async Task UploadAsCsvAsync(string tableName, List rowChanges) 46 | { 47 | tableName = tableName.ToLower(); 48 | var s3TableDocs = await LoadToS3Async(tableName, rowChanges); 49 | await _redshiftDao.PerformCsvMergeAsync(s3TableDocs); 50 | } 51 | 52 | public async Task UploadAsCsvAsync(Dictionary> tableRowChanges) 53 | { 54 | var s3TableDocsList = new List(); 55 | foreach (var kv in tableRowChanges) 56 | { 57 | var s3TableDocs = await LoadToS3Async(kv.Key.ToLower(), kv.Value); 58 | s3TableDocsList.AddRange(s3TableDocs); 59 | } 60 | await _redshiftDao.PerformCsvMergeAsync(s3TableDocsList); 61 | } 62 | 63 | public async Task StorePartAsCsvAsync(string multiPartTag, string tableName, int part, List rowChanges) 64 | { 65 | lock (_cacheSyncRoot) 66 | { 67 | if (_cachedMultiPartDocumentPaths.ContainsKey(multiPartTag)) 68 | _cachedMultiPartDocumentPaths.Add(multiPartTag, new List()); 69 | } 70 | 71 | tableName = tableName.ToLower(); 72 | var s3TableDocs = await LoadToS3Async(tableName, rowChanges); 73 | 74 | lock (_cacheSyncRoot) 75 | { 76 | _cachedMultiPartDocumentPaths[multiPartTag].AddRange(s3TableDocs); 77 | } 78 | } 79 | 80 | public async Task CommitMultiplePartsAsync(string multiPartTag) 81 | { 82 | List s3TableDocs = null; 83 | if (_cachedMultiPartDocumentPaths.TryGetValue(multiPartTag, out s3TableDocs)) 84 | { 85 | await _redshiftDao.PerformCsvMergeAsync(s3TableDocs); 86 | _cachedMultiPartDocumentPaths.Remove(multiPartTag); 87 | } 88 | else 89 | throw new InvalidOperationException($"No multi-part tag exists that matches {multiPartTag}"); 90 | } 91 | 92 | private async Task> LoadToS3Async(string tableName, List changesToPut) 93 | { 94 | var tableUpdates = new List(); 95 | 96 | using (AmazonS3Client s3Client = GetS3Client()) 97 | { 98 | var orderedColumns = _redshiftDao.GetOrderedColumns(tableName); 99 | var upsertPath = await _s3Uploader.PutS3UpsertAsync(s3Client, tableName, changesToPut, orderedColumns); 100 | var deletePath = await _s3Uploader.PutS3DeleteAsync(s3Client, tableName, changesToPut, orderedColumns); 101 | 102 | tableUpdates.Add(new S3TableDocuments() { TableName = tableName, UpsertPath = upsertPath, DeletePath = deletePath }); 103 | 104 | return tableUpdates; 105 | } 106 | } 107 | 108 | private AmazonS3Client GetS3Client() 109 | { 110 | return new AmazonS3Client(_configuration.AccessKey, 111 | _configuration.SecretAccessKey, 112 | RegionEndpoint.GetBySystemName(_configuration.Region)); 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader.Transactional/CdcTransactionClient.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Tables; 2 | using CdcTools.CdcReader.Transactional.State; 3 | using System; 4 | using System.Collections.Concurrent; 5 | using System.Collections.Generic; 6 | using System.Diagnostics; 7 | using System.Threading; 8 | using System.Threading.Tasks; 9 | 10 | namespace CdcTools.CdcReader.Transactional 11 | { 12 | public class CdcTransactionClient 13 | { 14 | private ICdcRepository _cdcRepository; 15 | private ITableSchemaRepository _tableSchemaRepository; 16 | private IFullLoadRepository _fullLoadRepository; 17 | private IStateManager _stateManager; 18 | private ITransactionCoordinator _transactionCoordinator; 19 | private CancellationTokenSource _transactionCts; 20 | 21 | private BlockingCollection _transactionBatchBuffer; 22 | private object _transactionLockObj = new object(); 23 | 24 | public CdcTransactionClient(string connectionString, 25 | string stateManagementConnectionString, 26 | ICdcRepository cdcRepository=null, 27 | ITableSchemaRepository tableSchemaRepository=null, 28 | IFullLoadRepository fullLoadRepository=null, 29 | IStateManager stateManager=null, 30 | ITransactionCoordinator transactionCoordinator=null) 31 | { 32 | if (cdcRepository == null) 33 | _cdcRepository = new CdcRepository(connectionString); 34 | else 35 | _cdcRepository = cdcRepository; 36 | 37 | if (tableSchemaRepository == null) 38 | _tableSchemaRepository = new TableSchemaRepository(connectionString); 39 | else 40 | _tableSchemaRepository = tableSchemaRepository; 41 | 42 | if (fullLoadRepository == null) 43 | _fullLoadRepository = new FullLoadRepository(connectionString); 44 | else 45 | _fullLoadRepository = fullLoadRepository; 46 | 47 | if (stateManager == null) 48 | _stateManager = new StateManager(stateManagementConnectionString); 49 | else 50 | _stateManager = stateManager; 51 | 52 | if (transactionCoordinator == null) 53 | _transactionCoordinator = new TransactionCoordinator(_cdcRepository); 54 | else 55 | _transactionCoordinator = transactionCoordinator; 56 | 57 | } 58 | 59 | public async Task StartAsync(List tables, 60 | int perTableBufferLength, 61 | int transactionBufferLength, 62 | int transactionBatchSizeLimit) 63 | { 64 | await StartAsync(tables, perTableBufferLength, transactionBufferLength, transactionBatchSizeLimit, null); 65 | } 66 | 67 | public async Task StartAsync(List tables, 68 | int perTableBufferLength, 69 | int transactionBufferLength, 70 | int transactionBatchSizeLimit, 71 | byte[] lastRetrievedLsn) 72 | { 73 | if (_transactionBatchBuffer != null) 74 | throw new InvalidOperationException("There is an active transaction stream that must be stopped first"); 75 | 76 | var tableSchemas = new List(); 77 | foreach (var tableName in tables) 78 | tableSchemas.Add(await _tableSchemaRepository.GetTableSchemaAsync(tableName)); 79 | 80 | lock (_transactionLockObj) 81 | { 82 | _transactionCts = new CancellationTokenSource(); 83 | _transactionBatchBuffer = new BlockingCollection(transactionBufferLength); 84 | var tableChangeBuffers = _transactionCoordinator.StartTableReaders(_transactionCts.Token, tableSchemas, perTableBufferLength, lastRetrievedLsn); 85 | _transactionCoordinator.StartGroupingTransactions(_transactionCts.Token, tableSchemas, tableChangeBuffers, _transactionBatchBuffer, transactionBatchSizeLimit); 86 | } 87 | } 88 | 89 | public async Task NextAsync() 90 | { 91 | TransactionBatch batch = null; 92 | 93 | while(!_transactionBatchBuffer.TryTake(out batch)) 94 | await Task.Delay(100); 95 | 96 | return batch; 97 | } 98 | 99 | public async Task NextAsync(CancellationToken token) 100 | { 101 | TransactionBatch batch = null; 102 | 103 | while (!_transactionBatchBuffer.TryTake(out batch) && !token.IsCancellationRequested) 104 | await Task.Delay(100); 105 | 106 | return batch; 107 | } 108 | 109 | public async Task NextAsync(CancellationToken token, TimeSpan waitPeriod) 110 | { 111 | var sw = new Stopwatch(); 112 | sw.Start(); 113 | TransactionBatch batch = null; 114 | 115 | while (!_transactionBatchBuffer.TryTake(out batch) && sw.Elapsed <= waitPeriod && !token.IsCancellationRequested) 116 | await Task.Delay(100); 117 | 118 | return batch; 119 | } 120 | 121 | public void Stop() 122 | { 123 | lock (_transactionLockObj) 124 | { 125 | _transactionCts.Cancel(); 126 | 127 | if (_transactionBatchBuffer != null) 128 | _transactionBatchBuffer = null; 129 | } 130 | } 131 | 132 | public async Task> GetLastTransactionIdAsync(string executionId) 133 | { 134 | return await _stateManager.GetLastTransactionIdAsync(executionId); 135 | } 136 | 137 | public async Task StoreTransactionIdAsync(string executionId, TransactionId transactionId) 138 | { 139 | await _stateManager.StoreTransactionIdAsync(executionId, transactionId); 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /CdcTools.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.27004.2009 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CdcTools.CdcReader", "src\CdcTools.CdcReader\CdcTools.CdcReader.csproj", "{711D5F45-D094-4D7D-BA4D-621094FAE31C}" 7 | EndProject 8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Common", "Common", "{40662B41-C13C-4E3B-9316-E6F943F25104}" 9 | EndProject 10 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CdcTools.SqlToAvro", "src\CdcTools.SqlToAvro\CdcTools.SqlToAvro.csproj", "{979F5FD5-900A-4710-9AB6-D565EFA41EBB}" 11 | EndProject 12 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CdcTools.CdcToKafka.Streaming", "src\CdcTools.CdcToKafka.Streaming\CdcTools.CdcToKafka.Streaming.csproj", "{454ADC46-0951-46A5-9AF6-B52E0F4047CB}" 13 | EndProject 14 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CdcTools.Redshift", "src\CdcTools.Redshift\CdcTools.Redshift.csproj", "{71C46BB8-5078-4369-83BF-7B70BAE564EA}" 15 | EndProject 16 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CdcTools.KafkaToRedshift", "src\CdcTools.KafkaToRedshift\CdcTools.KafkaToRedshift.csproj", "{EC28C6BC-FD6A-4CC3-A98D-3B1633AC2D8C}" 17 | EndProject 18 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CdcTools.CdcToRedshift", "src\CdcTools.CdcToRedshift\CdcTools.CdcToRedshift.csproj", "{0A5DCA98-B7D3-471A-AEF8-F51CF1E04E71}" 19 | EndProject 20 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Services", "Services", "{0CF12805-C062-441B-8F46-A319085F69CF}" 21 | EndProject 22 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CdcTools.CdcReader.Transactional", "src\CdcTools.CdcReader.Transactional\CdcTools.CdcReader.Transactional.csproj", "{B3C7C82D-5E40-41F4-846E-5C1C5874D557}" 23 | EndProject 24 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Database", "Database", "{F009633F-78D9-42CC-B6DE-8C801CD5F8F6}" 25 | EndProject 26 | Project("{00D1A9C2-B5F0-4AF3-8072-F6C62B433612}") = "CdcTools.CdcReader.Database", "src\CdcTools.CdcReader.Database\CdcTools.CdcReader.Database.sqlproj", "{5073F155-78C7-4576-8C56-33C6543DEFB7}" 27 | EndProject 28 | Global 29 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 30 | Debug|Any CPU = Debug|Any CPU 31 | Release|Any CPU = Release|Any CPU 32 | EndGlobalSection 33 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 34 | {711D5F45-D094-4D7D-BA4D-621094FAE31C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 35 | {711D5F45-D094-4D7D-BA4D-621094FAE31C}.Debug|Any CPU.Build.0 = Debug|Any CPU 36 | {711D5F45-D094-4D7D-BA4D-621094FAE31C}.Release|Any CPU.ActiveCfg = Release|Any CPU 37 | {711D5F45-D094-4D7D-BA4D-621094FAE31C}.Release|Any CPU.Build.0 = Release|Any CPU 38 | {979F5FD5-900A-4710-9AB6-D565EFA41EBB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 39 | {979F5FD5-900A-4710-9AB6-D565EFA41EBB}.Debug|Any CPU.Build.0 = Debug|Any CPU 40 | {979F5FD5-900A-4710-9AB6-D565EFA41EBB}.Release|Any CPU.ActiveCfg = Release|Any CPU 41 | {979F5FD5-900A-4710-9AB6-D565EFA41EBB}.Release|Any CPU.Build.0 = Release|Any CPU 42 | {454ADC46-0951-46A5-9AF6-B52E0F4047CB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 43 | {454ADC46-0951-46A5-9AF6-B52E0F4047CB}.Debug|Any CPU.Build.0 = Debug|Any CPU 44 | {454ADC46-0951-46A5-9AF6-B52E0F4047CB}.Release|Any CPU.ActiveCfg = Release|Any CPU 45 | {454ADC46-0951-46A5-9AF6-B52E0F4047CB}.Release|Any CPU.Build.0 = Release|Any CPU 46 | {71C46BB8-5078-4369-83BF-7B70BAE564EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 47 | {71C46BB8-5078-4369-83BF-7B70BAE564EA}.Debug|Any CPU.Build.0 = Debug|Any CPU 48 | {71C46BB8-5078-4369-83BF-7B70BAE564EA}.Release|Any CPU.ActiveCfg = Release|Any CPU 49 | {71C46BB8-5078-4369-83BF-7B70BAE564EA}.Release|Any CPU.Build.0 = Release|Any CPU 50 | {EC28C6BC-FD6A-4CC3-A98D-3B1633AC2D8C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 51 | {EC28C6BC-FD6A-4CC3-A98D-3B1633AC2D8C}.Debug|Any CPU.Build.0 = Debug|Any CPU 52 | {EC28C6BC-FD6A-4CC3-A98D-3B1633AC2D8C}.Release|Any CPU.ActiveCfg = Release|Any CPU 53 | {EC28C6BC-FD6A-4CC3-A98D-3B1633AC2D8C}.Release|Any CPU.Build.0 = Release|Any CPU 54 | {0A5DCA98-B7D3-471A-AEF8-F51CF1E04E71}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 55 | {0A5DCA98-B7D3-471A-AEF8-F51CF1E04E71}.Debug|Any CPU.Build.0 = Debug|Any CPU 56 | {0A5DCA98-B7D3-471A-AEF8-F51CF1E04E71}.Release|Any CPU.ActiveCfg = Release|Any CPU 57 | {0A5DCA98-B7D3-471A-AEF8-F51CF1E04E71}.Release|Any CPU.Build.0 = Release|Any CPU 58 | {B3C7C82D-5E40-41F4-846E-5C1C5874D557}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 59 | {B3C7C82D-5E40-41F4-846E-5C1C5874D557}.Debug|Any CPU.Build.0 = Debug|Any CPU 60 | {B3C7C82D-5E40-41F4-846E-5C1C5874D557}.Release|Any CPU.ActiveCfg = Release|Any CPU 61 | {B3C7C82D-5E40-41F4-846E-5C1C5874D557}.Release|Any CPU.Build.0 = Release|Any CPU 62 | {5073F155-78C7-4576-8C56-33C6543DEFB7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 63 | {5073F155-78C7-4576-8C56-33C6543DEFB7}.Debug|Any CPU.Build.0 = Debug|Any CPU 64 | {5073F155-78C7-4576-8C56-33C6543DEFB7}.Debug|Any CPU.Deploy.0 = Debug|Any CPU 65 | {5073F155-78C7-4576-8C56-33C6543DEFB7}.Release|Any CPU.ActiveCfg = Release|Any CPU 66 | {5073F155-78C7-4576-8C56-33C6543DEFB7}.Release|Any CPU.Build.0 = Release|Any CPU 67 | {5073F155-78C7-4576-8C56-33C6543DEFB7}.Release|Any CPU.Deploy.0 = Release|Any CPU 68 | EndGlobalSection 69 | GlobalSection(SolutionProperties) = preSolution 70 | HideSolutionNode = FALSE 71 | EndGlobalSection 72 | GlobalSection(NestedProjects) = preSolution 73 | {711D5F45-D094-4D7D-BA4D-621094FAE31C} = {40662B41-C13C-4E3B-9316-E6F943F25104} 74 | {979F5FD5-900A-4710-9AB6-D565EFA41EBB} = {40662B41-C13C-4E3B-9316-E6F943F25104} 75 | {454ADC46-0951-46A5-9AF6-B52E0F4047CB} = {0CF12805-C062-441B-8F46-A319085F69CF} 76 | {71C46BB8-5078-4369-83BF-7B70BAE564EA} = {40662B41-C13C-4E3B-9316-E6F943F25104} 77 | {EC28C6BC-FD6A-4CC3-A98D-3B1633AC2D8C} = {0CF12805-C062-441B-8F46-A319085F69CF} 78 | {0A5DCA98-B7D3-471A-AEF8-F51CF1E04E71} = {0CF12805-C062-441B-8F46-A319085F69CF} 79 | {B3C7C82D-5E40-41F4-846E-5C1C5874D557} = {40662B41-C13C-4E3B-9316-E6F943F25104} 80 | {5073F155-78C7-4576-8C56-33C6543DEFB7} = {F009633F-78D9-42CC-B6DE-8C801CD5F8F6} 81 | EndGlobalSection 82 | GlobalSection(ExtensibilityGlobals) = postSolution 83 | SolutionGuid = {ABE5C26C-2EA9-4826-87FB-02D217DD8B46} 84 | EndGlobalSection 85 | EndGlobal 86 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.suo 8 | *.user 9 | *.userosscache 10 | *.sln.docstates 11 | 12 | # User-specific files (MonoDevelop/Xamarin Studio) 13 | *.userprefs 14 | 15 | # Build results 16 | [Dd]ebug/ 17 | [Dd]ebugPublic/ 18 | [Rr]elease/ 19 | [Rr]eleases/ 20 | x64/ 21 | x86/ 22 | bld/ 23 | [Bb]in/ 24 | [Oo]bj/ 25 | [Ll]og/ 26 | 27 | # Visual Studio 2015 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # MSTest test Results 33 | [Tt]est[Rr]esult*/ 34 | [Bb]uild[Ll]og.* 35 | 36 | # NUNIT 37 | *.VisualState.xml 38 | TestResult.xml 39 | 40 | # Build Results of an ATL Project 41 | [Dd]ebugPS/ 42 | [Rr]eleasePS/ 43 | dlldata.c 44 | 45 | # .NET Core 46 | project.lock.json 47 | project.fragment.lock.json 48 | artifacts/ 49 | **/Properties/launchSettings.json 50 | 51 | *_i.c 52 | *_p.c 53 | *_i.h 54 | *.ilk 55 | *.meta 56 | *.obj 57 | *.pch 58 | *.pdb 59 | *.pgc 60 | *.pgd 61 | *.rsp 62 | *.sbr 63 | *.tlb 64 | *.tli 65 | *.tlh 66 | *.tmp 67 | *.tmp_proj 68 | *.log 69 | *.vspscc 70 | *.vssscc 71 | .builds 72 | *.pidb 73 | *.svclog 74 | *.scc 75 | 76 | # Chutzpah Test files 77 | _Chutzpah* 78 | 79 | # Visual C++ cache files 80 | ipch/ 81 | *.aps 82 | *.ncb 83 | *.opendb 84 | *.opensdf 85 | *.sdf 86 | *.cachefile 87 | *.VC.db 88 | *.VC.VC.opendb 89 | 90 | # Visual Studio profiler 91 | *.psess 92 | *.vsp 93 | *.vspx 94 | *.sap 95 | 96 | # TFS 2012 Local Workspace 97 | $tf/ 98 | 99 | # Guidance Automation Toolkit 100 | *.gpState 101 | 102 | # ReSharper is a .NET coding add-in 103 | _ReSharper*/ 104 | *.[Rr]e[Ss]harper 105 | *.DotSettings.user 106 | 107 | # JustCode is a .NET coding add-in 108 | .JustCode 109 | 110 | # TeamCity is a build add-in 111 | _TeamCity* 112 | 113 | # DotCover is a Code Coverage Tool 114 | *.dotCover 115 | 116 | # Visual Studio code coverage results 117 | *.coverage 118 | *.coveragexml 119 | 120 | # NCrunch 121 | _NCrunch_* 122 | .*crunch*.local.xml 123 | nCrunchTemp_* 124 | 125 | # MightyMoose 126 | *.mm.* 127 | AutoTest.Net/ 128 | 129 | # Web workbench (sass) 130 | .sass-cache/ 131 | 132 | # Installshield output folder 133 | [Ee]xpress/ 134 | 135 | # DocProject is a documentation generator add-in 136 | DocProject/buildhelp/ 137 | DocProject/Help/*.HxT 138 | DocProject/Help/*.HxC 139 | DocProject/Help/*.hhc 140 | DocProject/Help/*.hhk 141 | DocProject/Help/*.hhp 142 | DocProject/Help/Html2 143 | DocProject/Help/html 144 | 145 | # Click-Once directory 146 | publish/ 147 | 148 | # Publish Web Output 149 | *.[Pp]ublish.xml 150 | *.azurePubxml 151 | # TODO: Comment the next line if you want to checkin your web deploy settings 152 | # but database connection strings (with potential passwords) will be unencrypted 153 | *.pubxml 154 | *.publishproj 155 | 156 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 157 | # checkin your Azure Web App publish settings, but sensitive information contained 158 | # in these scripts will be unencrypted 159 | PublishScripts/ 160 | 161 | # NuGet Packages 162 | *.nupkg 163 | # The packages folder can be ignored because of Package Restore 164 | **/packages/* 165 | # except build/, which is used as an MSBuild target. 166 | !**/packages/build/ 167 | # Uncomment if necessary however generally it will be regenerated when needed 168 | #!**/packages/repositories.config 169 | # NuGet v3's project.json files produces more ignorable files 170 | *.nuget.props 171 | *.nuget.targets 172 | 173 | # Microsoft Azure Build Output 174 | csx/ 175 | *.build.csdef 176 | 177 | # Microsoft Azure Emulator 178 | ecf/ 179 | rcf/ 180 | 181 | # Windows Store app package directories and files 182 | AppPackages/ 183 | BundleArtifacts/ 184 | Package.StoreAssociation.xml 185 | _pkginfo.txt 186 | 187 | # Visual Studio cache files 188 | # files ending in .cache can be ignored 189 | *.[Cc]ache 190 | # but keep track of directories ending in .cache 191 | !*.[Cc]ache/ 192 | 193 | # Others 194 | ClientBin/ 195 | ~$* 196 | *~ 197 | *.dbmdl 198 | *.dbproj.schemaview 199 | *.jfm 200 | *.pfx 201 | *.publishsettings 202 | orleans.codegen.cs 203 | 204 | # Since there are multiple workflows, uncomment next line to ignore bower_components 205 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 206 | #bower_components/ 207 | 208 | # RIA/Silverlight projects 209 | Generated_Code/ 210 | 211 | # Backup & report files from converting an old project file 212 | # to a newer Visual Studio version. Backup files are not needed, 213 | # because we have git ;-) 214 | _UpgradeReport_Files/ 215 | Backup*/ 216 | UpgradeLog*.XML 217 | UpgradeLog*.htm 218 | 219 | # SQL Server files 220 | *.mdf 221 | *.ldf 222 | *.ndf 223 | 224 | # Business Intelligence projects 225 | *.rdl.data 226 | *.bim.layout 227 | *.bim_*.settings 228 | 229 | # Microsoft Fakes 230 | FakesAssemblies/ 231 | 232 | # GhostDoc plugin setting file 233 | *.GhostDoc.xml 234 | 235 | # Node.js Tools for Visual Studio 236 | .ntvs_analysis.dat 237 | node_modules/ 238 | 239 | # Typescript v1 declaration files 240 | typings/ 241 | 242 | # Visual Studio 6 build log 243 | *.plg 244 | 245 | # Visual Studio 6 workspace options file 246 | *.opt 247 | 248 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 249 | *.vbw 250 | 251 | # Visual Studio LightSwitch build output 252 | **/*.HTMLClient/GeneratedArtifacts 253 | **/*.DesktopClient/GeneratedArtifacts 254 | **/*.DesktopClient/ModelManifest.xml 255 | **/*.Server/GeneratedArtifacts 256 | **/*.Server/ModelManifest.xml 257 | _Pvt_Extensions 258 | 259 | # Paket dependency manager 260 | .paket/paket.exe 261 | paket-files/ 262 | 263 | # FAKE - F# Make 264 | .fake/ 265 | 266 | # JetBrains Rider 267 | .idea/ 268 | *.sln.iml 269 | 270 | # CodeRush 271 | .cr/ 272 | 273 | # Python Tools for Visual Studio (PTVS) 274 | __pycache__/ 275 | *.pyc 276 | 277 | # Cake - Uncomment if you are using it 278 | # tools/** 279 | # !tools/packages.config 280 | 281 | # Telerik's JustMock configuration file 282 | *.jmconfig 283 | 284 | # BizTalk build output 285 | *.btp.cs 286 | *.btm.cs 287 | *.odx.cs 288 | *.xsd.cs 289 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Consumers/KeyedJsonConsumer.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.KafkaToRedshift.Redshift; 2 | using CdcTools.Redshift.Changes; 3 | using Confluent.Kafka; 4 | using Confluent.Kafka.Serialization; 5 | using Newtonsoft.Json; 6 | using System; 7 | using System.Collections.Concurrent; 8 | using System.Collections.Generic; 9 | using System.Linq; 10 | using System.Text; 11 | using System.Threading; 12 | using System.Threading.Tasks; 13 | 14 | namespace CdcTools.KafkaToRedshift.Consumers 15 | { 16 | public class KeyedJsonConsumer : IConsumer 17 | { 18 | private IRedshiftWriter _redshiftWriter; 19 | private List _consumerTasks; 20 | private List _redshiftTasks; 21 | private string _kafkaBootstrapServers; 22 | 23 | public KeyedJsonConsumer(IRedshiftWriter redshiftClient, string kafkaBootstrapServers) 24 | { 25 | _redshiftWriter = redshiftClient; 26 | _consumerTasks = new List(); 27 | _redshiftTasks = new List(); 28 | _kafkaBootstrapServers = kafkaBootstrapServers; 29 | } 30 | 31 | public async Task StartConsumingAsync(CancellationToken token, TimeSpan windowSizePeriod, int windowSizeItems, List kafkaSources) 32 | { 33 | var columnsLoaded = await CacheRedshiftColumns(kafkaSources.Select(x => x.Table).ToList()); 34 | if (!columnsLoaded) 35 | return columnsLoaded; 36 | 37 | foreach (var kafkaSource in kafkaSources) 38 | { 39 | var accumulatedChanges = new BlockingCollection>(); 40 | _consumerTasks.Add(Task.Run(() => 41 | { 42 | try 43 | { 44 | Consume(token, accumulatedChanges, kafkaSource.Topic, kafkaSource.Table); 45 | } 46 | catch(Exception ex) 47 | { 48 | Console.WriteLine($"Consumer failure. Table: {kafkaSource.Table}. Error: {ex}"); 49 | } 50 | })); 51 | 52 | _redshiftTasks.Add(Task.Run(async () => 53 | { 54 | try 55 | { 56 | await _redshiftWriter.StartWritingAsync(token, windowSizePeriod, windowSizeItems, kafkaSource.Table, accumulatedChanges); 57 | } 58 | catch (Exception ex) 59 | { 60 | Console.WriteLine($"Redshift Writer failure. Table: {kafkaSource.Table}. Error: {ex}"); 61 | } 62 | })); 63 | } 64 | 65 | return columnsLoaded; 66 | } 67 | 68 | public void WaitForCompletion() 69 | { 70 | Task.WaitAll(_consumerTasks.ToArray()); 71 | Task.WaitAll(_redshiftTasks.ToArray()); 72 | } 73 | 74 | private async Task CacheRedshiftColumns(List tables) 75 | { 76 | try 77 | { 78 | await _redshiftWriter.CacheTableColumnsAsync(tables); 79 | return true; 80 | } 81 | catch (Exception ex) 82 | { 83 | Console.WriteLine($"Failed getting Redshift column meta data. {ex}"); 84 | return false; 85 | } 86 | } 87 | 88 | private void Consume(CancellationToken token, BlockingCollection> accumulatedChanges, string topic, string table) 89 | { 90 | var conf = new Dictionary 91 | { 92 | { "group.id", $"{table}-consumer-group" }, 93 | { "statistics.interval.ms", 60000 }, 94 | { "bootstrap.servers", _kafkaBootstrapServers } 95 | }; 96 | 97 | foreach (var confPair in conf) 98 | Console.WriteLine(topic + " - " + confPair.Key + ": " + confPair.Value); 99 | 100 | using (var consumer = new Consumer(conf, new StringDeserializer(Encoding.UTF8), new StringDeserializer(Encoding.UTF8))) 101 | { 102 | consumer.OnError += (_, msg) 103 | => Console.WriteLine($"{topic} - Error: {msg.Reason}"); 104 | 105 | consumer.OnConsumeError += (_, msg) 106 | => Console.WriteLine($"{topic} - Consume error: {msg.Error.Reason}"); 107 | 108 | consumer.OnPartitionsAssigned += (_, partitions) => 109 | { 110 | Console.WriteLine($"{topic} - Assigned partitions: [{string.Join(", ", partitions)}], member id: {consumer.MemberId}"); 111 | consumer.Assign(partitions); 112 | }; 113 | 114 | consumer.OnPartitionsRevoked += (_, partitions) => 115 | { 116 | Console.WriteLine($"{topic} - Revoked partitions: [{string.Join(", ", partitions)}]"); 117 | consumer.Unassign(); 118 | }; 119 | 120 | Console.WriteLine($"Subscribing to topic {topic}"); 121 | consumer.Subscribe(topic); 122 | int secondsWithoutMessage = 0; 123 | 124 | while (!token.IsCancellationRequested) 125 | { 126 | Message msg = null; 127 | if (consumer.Consume(out msg, TimeSpan.FromSeconds(1))) 128 | { 129 | AddToBuffer(consumer, msg, accumulatedChanges); 130 | secondsWithoutMessage = 0; 131 | } 132 | else 133 | { 134 | secondsWithoutMessage++; 135 | if (secondsWithoutMessage % 30 == 0) 136 | Console.WriteLine($"{topic}: No messages in last {secondsWithoutMessage} seconds"); 137 | 138 | Task.Delay(100).Wait(); 139 | } 140 | } 141 | } 142 | 143 | accumulatedChanges.CompleteAdding(); // notifies consumers that no more messages will come 144 | } 145 | 146 | private void AddToBuffer(Consumer consumer, Message jsonMessage, BlockingCollection> accumulatedChanges) 147 | { 148 | var msg = new MessageProxy(consumer, jsonMessage) 149 | { 150 | Payload = JsonConvert.DeserializeObject(jsonMessage.Value) 151 | }; 152 | accumulatedChanges.Add(msg); 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Consumers/NonKeyedJsonConsumer.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.KafkaToRedshift.Redshift; 2 | using CdcTools.Redshift; 3 | using CdcTools.Redshift.Changes; 4 | using CdcTools.Redshift.S3; 5 | using Confluent.Kafka; 6 | using Confluent.Kafka.Serialization; 7 | using Newtonsoft.Json; 8 | using System; 9 | using System.Collections.Concurrent; 10 | using System.Collections.Generic; 11 | using System.Linq; 12 | using System.Text; 13 | using System.Threading; 14 | using System.Threading.Tasks; 15 | 16 | namespace CdcTools.KafkaToRedshift.Consumers 17 | { 18 | public class NonKeyedJsonConsumer : IConsumer 19 | { 20 | private IRedshiftWriter _redshiftWriter; 21 | private List _consumerTasks; 22 | private List _redshiftTasks; 23 | private string _kafkaBootstrapServers; 24 | 25 | public NonKeyedJsonConsumer(IRedshiftWriter redshiftClient, string kafkaBootstrapServers) 26 | { 27 | _redshiftWriter = redshiftClient; 28 | _consumerTasks = new List(); 29 | _redshiftTasks = new List(); 30 | _kafkaBootstrapServers = kafkaBootstrapServers; 31 | } 32 | 33 | public async Task StartConsumingAsync(CancellationToken token, TimeSpan windowSizePeriod, int windowSizeItems, List kafkaSources) 34 | { 35 | var columnsLoaded = await CacheRedshiftColumns(kafkaSources.Select(x => x.Table).ToList()); 36 | if (!columnsLoaded) 37 | return columnsLoaded; 38 | 39 | foreach (var kafkaSource in kafkaSources) 40 | { 41 | var accumulatedChanges = new BlockingCollection>(); 42 | _consumerTasks.Add(Task.Run(() => 43 | { 44 | try 45 | { 46 | Consume(token, accumulatedChanges, kafkaSource.Topic, kafkaSource.Table); 47 | } 48 | catch (Exception ex) 49 | { 50 | Console.WriteLine($"Consumer failure. Table: {kafkaSource.Table}. Error: {ex}"); 51 | } 52 | })); 53 | 54 | _redshiftTasks.Add(Task.Run(async () => 55 | { 56 | try 57 | { 58 | await _redshiftWriter.StartWritingAsync(token, windowSizePeriod, windowSizeItems, kafkaSource.Table, accumulatedChanges); 59 | } 60 | catch (Exception ex) 61 | { 62 | Console.WriteLine($"Redshift Writer failure. Table: {kafkaSource.Table}. Error: {ex}"); 63 | } 64 | })); 65 | } 66 | 67 | return columnsLoaded; 68 | } 69 | 70 | public void WaitForCompletion() 71 | { 72 | Task.WaitAll(_consumerTasks.ToArray()); 73 | Task.WaitAll(_redshiftTasks.ToArray()); 74 | } 75 | 76 | private async Task CacheRedshiftColumns(List tables) 77 | { 78 | try 79 | { 80 | await _redshiftWriter.CacheTableColumnsAsync(tables); 81 | return true; 82 | } 83 | catch (Exception ex) 84 | { 85 | Console.WriteLine($"Failed getting Redshift column meta data. {ex}"); 86 | return false; 87 | } 88 | } 89 | 90 | private void Consume(CancellationToken token, BlockingCollection> accumulatedChanges, string topic, string table) 91 | { 92 | var conf = new Dictionary 93 | { 94 | { "group.id", $"{table}-consumer-group" }, 95 | { "statistics.interval.ms", 60000 }, 96 | { "bootstrap.servers", _kafkaBootstrapServers } 97 | }; 98 | 99 | foreach (var confPair in conf) 100 | Console.WriteLine(topic + " - " + confPair.Key + ": " + confPair.Value); 101 | 102 | using (var consumer = new Consumer(conf, null, new StringDeserializer(Encoding.UTF8))) 103 | { 104 | consumer.OnError += (_, msg) 105 | => Console.WriteLine($"{topic} - Error: {msg.Reason}"); 106 | 107 | consumer.OnConsumeError += (_, msg) 108 | => Console.WriteLine($"{topic} - Consume error: {msg.Error.Reason}"); 109 | 110 | consumer.OnPartitionsAssigned += (_, partitions) => 111 | { 112 | Console.WriteLine($"{topic} - Assigned partitions: [{string.Join(", ", partitions)}], member id: {consumer.MemberId}"); 113 | consumer.Assign(partitions); 114 | }; 115 | 116 | consumer.OnPartitionsRevoked += (_, partitions) => 117 | { 118 | Console.WriteLine($"{topic} - Revoked partitions: [{string.Join(", ", partitions)}]"); 119 | consumer.Unassign(); 120 | }; 121 | 122 | Console.WriteLine($"Subscribing to topic {topic}"); 123 | consumer.Subscribe(topic); 124 | int secondsWithoutMessage = 0; 125 | 126 | while (!token.IsCancellationRequested) 127 | { 128 | Message msg = null; 129 | if (consumer.Consume(out msg, TimeSpan.FromSeconds(1))) 130 | { 131 | AddToBuffer(consumer, msg, accumulatedChanges); 132 | secondsWithoutMessage = 0; 133 | } 134 | else 135 | { 136 | secondsWithoutMessage++; 137 | if (secondsWithoutMessage % 30 == 0) 138 | Console.WriteLine($"{topic}: No messages in last {secondsWithoutMessage} seconds"); 139 | 140 | Task.Delay(100).Wait(); 141 | } 142 | } 143 | } 144 | 145 | accumulatedChanges.CompleteAdding(); // notifies consumers that no more messages will come 146 | } 147 | 148 | private void AddToBuffer(Consumer consumer, Message jsonMessage, BlockingCollection> accumulatedChanges) 149 | { 150 | var msg = new MessageProxy(consumer, jsonMessage) 151 | { 152 | Payload = JsonConvert.DeserializeObject(jsonMessage.Value) 153 | }; 154 | accumulatedChanges.Add(msg); 155 | } 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/TableSchemaRepository.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Data; 4 | using System.Data.SqlClient; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace CdcTools.CdcReader.Tables 10 | { 11 | public class TableSchemaRepository : ITableSchemaRepository 12 | { 13 | private string _connString; 14 | 15 | public TableSchemaRepository(string connectionString) 16 | { 17 | _connString = connectionString; 18 | } 19 | 20 | public async Task GetTableSchemaAsync(string tableName) 21 | { 22 | var columns = await GetTableColumnsAsync(tableName); 23 | var primaryKeys = await GetTablePrimaryKeysAsync(tableName); 24 | 25 | var table = columns.GroupBy(x => new { x.Schema, x.TableName }).First(); 26 | 27 | var tableSchema = new TableSchema(); 28 | tableSchema.Schema = table.Key.Schema; 29 | tableSchema.TableName = table.Key.TableName; 30 | tableSchema.Columns = table.Select(x => new ColumnSchema() 31 | { 32 | DataType = x.DataType, 33 | MaxCharsLength = x.MaxCharsLength, 34 | Name = x.ColumnName, 35 | OrdinalPosition = x.OrdinalPosition, 36 | IsNullable = x.IsNullable 37 | }) 38 | .ToList(); 39 | 40 | tableSchema.PrimaryKeys = primaryKeys.Where(x => x.TableName.Equals(table.Key.TableName)) 41 | .Select(x => new PrimaryKeyColumn() { ColumnName = x.ColumnName, OrdinalPosition = x.OrdinalPosition }) 42 | .ToList(); 43 | 44 | return tableSchema; 45 | } 46 | 47 | private async Task> GetTableColumnsAsync(string tableName) 48 | { 49 | var columns = new List(); 50 | 51 | using (var conn = await GetOpenConnectionAsync()) 52 | { 53 | using (var command = conn.CreateCommand()) 54 | { 55 | command.CommandText = TableSchemaQueryBuilder.GetColumnsOfTableQuery(); 56 | command.CommandTimeout = 30; 57 | command.Parameters.Add("TableName", SqlDbType.VarChar).Value = tableName; 58 | 59 | using (var reader = await command.ExecuteReaderAsync()) 60 | { 61 | while (await reader.ReadAsync()) 62 | { 63 | var column = GetTableColumn(reader); 64 | if(column.ColumnName.IndexOf("msrepl", StringComparison.OrdinalIgnoreCase) == -1) 65 | columns.Add(column); 66 | } 67 | } 68 | } 69 | } 70 | 71 | return columns; 72 | } 73 | 74 | private TableColumn GetTableColumn(SqlDataReader reader) 75 | { 76 | var column = new TableColumn(); 77 | column.Schema = reader["TABLE_SCHEMA"].ToString(); 78 | column.TableName = reader["TABLE_NAME"].ToString(); 79 | column.ColumnName = reader["COLUMN_NAME"].ToString(); 80 | column.OrdinalPosition = (int)reader["ORDINAL_POSITION"]; 81 | column.DataType = reader["DATA_TYPE"].ToString(); 82 | column.IsNullable = reader["IS_NULLABLE"].ToString().ToLower().Equals("yes") ? true : false; 83 | 84 | if (reader["CHARACTER_MAXIMUM_LENGTH"] != DBNull.Value) 85 | column.MaxCharsLength = (int)reader["CHARACTER_MAXIMUM_LENGTH"]; 86 | 87 | if (reader["NUMERIC_SCALE"] != DBNull.Value) 88 | column.NumericScale = (int)reader["NUMERIC_SCALE"]; 89 | 90 | if (reader["NUMERIC_PRECISION"] != DBNull.Value) 91 | column.NumericPrecision = (int)(byte)reader["NUMERIC_PRECISION"]; 92 | 93 | return column; 94 | } 95 | 96 | private async Task> GetTablePrimaryKeysAsync() 97 | { 98 | var primaryKeys = new List(); 99 | 100 | using (var conn = await GetOpenConnectionAsync()) 101 | { 102 | using (var command = conn.CreateCommand()) 103 | { 104 | command.CommandText = TableSchemaQueryBuilder.GetPrimaryKeysQuery; 105 | command.CommandTimeout = 30; 106 | 107 | using (var reader = await command.ExecuteReaderAsync()) 108 | { 109 | while (await reader.ReadAsync()) 110 | { 111 | var pk = GetPrimaryKey(reader); 112 | primaryKeys.Add(pk); 113 | } 114 | } 115 | } 116 | } 117 | 118 | return primaryKeys; 119 | } 120 | 121 | private async Task> GetTablePrimaryKeysAsync(string tableName) 122 | { 123 | var primaryKeys = new List(); 124 | 125 | using (var conn = await GetOpenConnectionAsync()) 126 | { 127 | using (var command = conn.CreateCommand()) 128 | { 129 | command.CommandText = TableSchemaQueryBuilder.GetPrimaryKeyColumnsOfTableQuery(); 130 | command.CommandTimeout = 30; 131 | command.Parameters.Add("TableName", SqlDbType.VarChar).Value = tableName; 132 | 133 | using (var reader = await command.ExecuteReaderAsync()) 134 | { 135 | while (await reader.ReadAsync()) 136 | { 137 | var pk = GetPrimaryKey(reader); 138 | primaryKeys.Add(pk); 139 | } 140 | } 141 | } 142 | } 143 | 144 | return primaryKeys; 145 | } 146 | 147 | private TablePrimaryKey GetPrimaryKey(SqlDataReader reader) 148 | { 149 | var pk = new TablePrimaryKey(); 150 | pk.TableName = reader["TableName"].ToString(); 151 | pk.ColumnName = reader["ColumnName"].ToString(); 152 | pk.OrdinalPosition = (int)(byte)reader["OrdinalPosition"]; 153 | 154 | return pk; 155 | } 156 | 157 | private async Task GetOpenConnectionAsync() 158 | { 159 | var conn = new SqlConnection(_connString); 160 | await conn.OpenAsync(); 161 | 162 | return conn; 163 | } 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Program.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.KafkaToRedshift.Consumers; 2 | using CdcTools.KafkaToRedshift.Redshift; 3 | using CdcTools.Redshift; 4 | using Microsoft.Extensions.Configuration; 5 | using System; 6 | using System.Collections.Generic; 7 | using System.IO; 8 | using System.Linq; 9 | using System.Runtime.Loader; 10 | using System.Threading; 11 | using System.Threading.Tasks; 12 | 13 | namespace CdcTools.KafkaToRedshift 14 | { 15 | class Program 16 | { 17 | static void Main(string[] args) 18 | { 19 | Console.Title = "Kafka to Redshift Writer"; 20 | 21 | // support graceful shutdown in Docker 22 | var ended = new ManualResetEventSlim(); 23 | var starting = new ManualResetEventSlim(); 24 | 25 | AssemblyLoadContext.Default.Unloading += ctx => 26 | { 27 | System.Console.WriteLine("Unloading fired"); 28 | starting.Set(); 29 | System.Console.WriteLine("Waiting for completion"); 30 | ended.Wait(); 31 | }; 32 | 33 | // set up configuration 34 | var builder = new ConfigurationBuilder() 35 | .SetBasePath(Directory.GetCurrentDirectory()) 36 | .AddJsonFile("appsettings.json", optional: true, reloadOnChange: true) 37 | .AddCommandLine(args) 38 | .AddEnvironmentVariables("CDCTOOLS_"); // all environment variables with this prefix; 39 | 40 | IConfigurationRoot configuration = builder.Build(); 41 | 42 | // get parameters and start 43 | var tables = GetTables(configuration); 44 | var windowSizePeriod = GetWindowSizeTimePeriod(configuration); 45 | var windowSizeItems = GetWindowSizeItemCount(configuration); 46 | var serializationMode = GetSerializationMode(configuration); 47 | var messagesHaveKey = MessagesHaveKey(configuration); 48 | 49 | var kafkaSources = tables.Select(x => new KafkaSource() 50 | { 51 | Table = x, 52 | Topic = configuration["TableTopicPrefix"] + x.ToLower() 53 | }).ToList(); 54 | 55 | var cts = new CancellationTokenSource(); 56 | 57 | IConsumer consumer = GetConsumer(serializationMode, messagesHaveKey, configuration); 58 | bool startedOk = consumer.StartConsumingAsync(cts.Token, windowSizePeriod, windowSizeItems, kafkaSources).Result; 59 | if (startedOk) 60 | { 61 | Console.WriteLine($"Consuming messages of tables {string.Join(',', tables)} in {serializationMode.ToString()} deserialization mode with {windowSizePeriod} window sizes"); 62 | 63 | #if DEBUG 64 | Console.WriteLine("Press any key to shutdown"); 65 | Console.ReadKey(); 66 | #else 67 | starting.Wait(); 68 | Console.WriteLine("Received signal gracefully shutting down"); 69 | #endif 70 | } 71 | else 72 | { 73 | Console.WriteLine("Failed to start up correctly, shutting down"); 74 | } 75 | 76 | cts.Cancel(); 77 | consumer.WaitForCompletion(); 78 | ended.Set(); 79 | } 80 | 81 | private static List GetTables(IConfiguration configuration) 82 | { 83 | if (configuration["Tables"] != null) 84 | { 85 | return configuration["Tables"].Split(',').ToList(); 86 | } 87 | else 88 | { 89 | return new List(); 90 | } 91 | } 92 | 93 | private static TimeSpan GetWindowSizeTimePeriod(IConfiguration configuration) 94 | { 95 | return TimeSpan.FromMilliseconds(int.Parse(configuration["WindowMs"])); 96 | } 97 | 98 | private static int GetWindowSizeItemCount(IConfiguration configuration) 99 | { 100 | return int.Parse(configuration["WindowItems"]); 101 | } 102 | 103 | private static SerializationMode GetSerializationMode(IConfiguration configuration) 104 | { 105 | return (SerializationMode)Enum.Parse(typeof(SerializationMode), configuration["SerializationMode"]); 106 | } 107 | 108 | private static bool MessagesHaveKey(IConfiguration configuration) 109 | { 110 | return bool.Parse(configuration["messagesHaveKey"]); 111 | } 112 | 113 | private static string GetBootstrapServers(IConfiguration configuration) 114 | { 115 | return configuration["KafkaBootstrapServers"]; 116 | } 117 | 118 | private static string GetSchemaRegistryUrl(IConfiguration configuration) 119 | { 120 | return configuration["KafkaSchemaRegistryUrl"]; 121 | } 122 | 123 | private static IRedshiftWriter GetRedshiftWriter(IConfiguration configuration) 124 | { 125 | return new RedshiftWriter(new RedshiftClient(new RedshiftConfiguration() 126 | { 127 | AccessKey = configuration["AccessKey"], 128 | SecretAccessKey = configuration["SecretAccessKey"], 129 | Region = configuration["AwsRegion"], 130 | Port = configuration["RedshiftPort"], 131 | Server = configuration["RedshiftServer"], 132 | MasterUsername = configuration["RedshiftUser"], 133 | MasterUserPassword = configuration["RedshiftPassword"], 134 | DBName = configuration["RedshiftDbName"], 135 | IamRole = configuration["RedshiftRole"], 136 | S3BucketName = configuration["S3BucketName"] 137 | })); 138 | } 139 | 140 | private static IConsumer GetConsumer(SerializationMode serializationMode, bool messagesHaveKey, IConfiguration configuration) 141 | { 142 | var redshiftWriter = GetRedshiftWriter(configuration); 143 | if (serializationMode == SerializationMode.Avro) 144 | { 145 | if (messagesHaveKey) 146 | return new KeyedAvroConsumer(redshiftWriter, GetBootstrapServers(configuration), GetSchemaRegistryUrl(configuration)); 147 | else 148 | return new NonKeyedAvroConsumer(redshiftWriter, GetBootstrapServers(configuration), GetSchemaRegistryUrl(configuration)); 149 | } 150 | else 151 | { 152 | if (messagesHaveKey) 153 | return new KeyedJsonConsumer(redshiftWriter, GetBootstrapServers(configuration)); 154 | else 155 | return new NonKeyedJsonConsumer(redshiftWriter, GetBootstrapServers(configuration)); 156 | } 157 | } 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/FullLoadStreamer.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcToKafka.Streaming.Producers; 2 | using CdcTools.CdcReader; 3 | using CdcTools.CdcReader.Changes; 4 | using CdcTools.CdcReader.Tables; 5 | using Microsoft.Extensions.Configuration; 6 | using System; 7 | using System.Collections.Generic; 8 | using System.Linq; 9 | using System.Text; 10 | using System.Threading; 11 | using System.Threading.Tasks; 12 | 13 | namespace CdcTools.CdcToKafka.Streaming 14 | { 15 | public class FullLoadStreamer 16 | { 17 | private List _loadTasks; 18 | private string _kafkaTopicPrefix; 19 | private string _kafkaBootstrapServers; 20 | private string _schemaRegistryUrl; 21 | private CdcReaderClient _cdcReaderClient; 22 | 23 | public FullLoadStreamer(IConfiguration configuration, CdcReaderClient cdcReaderClient) 24 | { 25 | _cdcReaderClient = cdcReaderClient; 26 | _kafkaTopicPrefix = configuration["TableTopicPrefix"]; 27 | _kafkaBootstrapServers = configuration["KafkaBootstrapServers"]; 28 | _schemaRegistryUrl = configuration["KafkaSchemaRegistryUrl"]; 29 | 30 | _loadTasks = new List(); 31 | } 32 | 33 | public async Task StreamTablesAsync(CancellationToken token, 34 | string executionId, 35 | List tables, 36 | SerializationMode serializationMode, 37 | bool sendWithKey, 38 | int batchSize, 39 | int printMod) 40 | { 41 | foreach (var tableName in tables) 42 | { 43 | var tableSchema = await _cdcReaderClient.GetTableSchemaAsync(tableName); 44 | _loadTasks.Add(Task.Run(async () => 45 | { 46 | try 47 | { 48 | await StreamTableAsync(token, 49 | executionId, 50 | tableSchema, 51 | serializationMode, 52 | sendWithKey, 53 | batchSize, 54 | printMod); 55 | } 56 | catch(Exception ex) 57 | { 58 | Console.WriteLine(ex); 59 | } 60 | })); 61 | } 62 | } 63 | 64 | public void WaitForCompletion() 65 | { 66 | Task.WaitAll(_loadTasks.ToArray()); 67 | } 68 | 69 | public bool HasFinished() 70 | { 71 | return _loadTasks.All(x => x.IsCompleted); 72 | } 73 | 74 | private async Task StreamTableAsync(CancellationToken token, 75 | string executionId, 76 | TableSchema tableSchema, 77 | SerializationMode serializationMode, 78 | bool sendWithKey, 79 | int batchSize, 80 | int printPercentProgressMod) 81 | { 82 | string topicName = _kafkaTopicPrefix + tableSchema.TableName.ToLower(); 83 | var rowCount = await _cdcReaderClient.GetRowCountAsync(tableSchema); 84 | Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} has {rowCount} rows to export"); 85 | int progress = 0; 86 | 87 | using (var producer = ProducerFactory.GetProducer(topicName, tableSchema, serializationMode, sendWithKey, _kafkaBootstrapServers, _schemaRegistryUrl)) 88 | { 89 | long ctr = 0; 90 | PrimaryKeyValue lastRetrievedKey = null; 91 | var existingOffset = await _cdcReaderClient.GetLastFullLoadOffsetAsync(executionId, tableSchema.TableName); 92 | if (existingOffset.Result == CdcReader.State.Result.NoStoredState) 93 | { 94 | Console.WriteLine($"Table {tableSchema.TableName} - No previous stored offset. Starting from first row"); 95 | var firstBatch = await _cdcReaderClient.GetFirstBatchAsync(tableSchema, batchSize); 96 | ctr = await PublishAsync(producer, token, firstBatch, ctr); 97 | lastRetrievedKey = firstBatch.LastRowKey; 98 | await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, firstBatch.LastRowKey); 99 | } 100 | else 101 | { 102 | Console.WriteLine($"Table {tableSchema.TableName} - No data to export"); 103 | lastRetrievedKey = existingOffset.State; 104 | } 105 | 106 | bool finished = false; 107 | 108 | while (!token.IsCancellationRequested && !finished) 109 | { 110 | var changes = new List(); 111 | 112 | var batch = await _cdcReaderClient.GetBatchAsync(tableSchema, lastRetrievedKey, batchSize); 113 | ctr = await PublishAsync(producer, token, batch, ctr); 114 | 115 | int latestProgress = (int)(((double)ctr / (double)rowCount)*100); 116 | if(progress != latestProgress && latestProgress % printPercentProgressMod == 0) 117 | Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - Progress at {latestProgress}% ({ctr} records)"); 118 | 119 | progress = latestProgress; 120 | lastRetrievedKey = batch.LastRowKey; 121 | await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, lastRetrievedKey); 122 | 123 | if (!batch.Records.Any() || batch.Records.Count < batchSize) 124 | finished = true; 125 | } 126 | 127 | if (token.IsCancellationRequested) 128 | Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - cancelled at progress at {progress}% ({ctr} records)"); 129 | else 130 | Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - complete ({ctr} records)"); 131 | } 132 | } 133 | 134 | private async Task PublishAsync(IKafkaProducer producer, CancellationToken token, FullLoadBatch batch, long ctr) 135 | { 136 | foreach (var row in batch.Records) 137 | { 138 | var change = new ChangeRecord(); 139 | change.ChangeKey = row.ChangeKey; 140 | change.ChangeType = ChangeType.INSERT; 141 | change.LsnStr = ctr.ToString(); 142 | change.SeqValStr = ctr.ToString(); 143 | change.Data = row.Data; 144 | 145 | await producer.SendAsync(token, change); 146 | ctr++; 147 | } 148 | 149 | return ctr; 150 | } 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/CdcTools.Redshift/S3/S3Uploader.cs: -------------------------------------------------------------------------------- 1 | using Amazon.S3; 2 | using Amazon.S3.Model; 3 | using CdcTools.Redshift.Changes; 4 | using System; 5 | using System.Collections.Generic; 6 | using System.Linq; 7 | using System.Text; 8 | using System.Threading.Tasks; 9 | 10 | namespace CdcTools.Redshift.S3 11 | { 12 | public class S3Uploader : IS3Uploader 13 | { 14 | private string _bucketName; 15 | 16 | public S3Uploader(string bucketName) 17 | { 18 | _bucketName = bucketName; 19 | } 20 | 21 | public async Task PutS3UpsertAsync(AmazonS3Client s3Client, string table, List changeRecords, List orderedCols) 22 | { 23 | var changesToPut = GetValidChanges(changeRecords, ChangeType.INSERT, ChangeType.UPDATE_AFTER); 24 | if (!changesToPut.Any()) 25 | return ""; 26 | 27 | var document = BuildDocument(changesToPut, orderedCols); 28 | var s3Path = await PerformRequestAsync(s3Client, table, "upsert", document, changesToPut); 29 | 30 | Console.WriteLine($"Uploaded upsert to {s3Path} with {changesToPut.Count()} changes. {changeRecords.Count(x => x.ChangeType == ChangeType.INSERT || x.ChangeType == ChangeType.UPDATE_AFTER) - changesToPut.Count} redundant changes were omitted."); 31 | 32 | return s3Path; 33 | } 34 | 35 | public async Task PutS3DeleteAsync(AmazonS3Client s3Client, string table, List changeRecords, List orderedCols) 36 | { 37 | // where the last change to happen for a given record was a delete 38 | var changesToPut = GetValidChanges(changeRecords, ChangeType.DELETE); 39 | if (!changesToPut.Any()) 40 | return ""; 41 | 42 | var document = BuildDocument(changesToPut, orderedCols); 43 | var s3Path = await PerformRequestAsync(s3Client, table, "delete", document, changesToPut); 44 | 45 | Console.WriteLine($"Uploaded delete to {s3Path} with {changesToPut.Count()} changes. {changeRecords.Count(x => x.ChangeType == ChangeType.DELETE) - changesToPut.Count} redundant changes were omitted."); 46 | 47 | return s3Path; 48 | } 49 | 50 | public async Task PutS3UpsertPartAsync(AmazonS3Client s3Client, string table, List changeRecords, List orderedCols, int part) 51 | { 52 | var changesToPut = GetValidChanges(changeRecords, ChangeType.INSERT, ChangeType.UPDATE_AFTER); 53 | if (!changesToPut.Any()) 54 | return ""; 55 | 56 | var document = BuildDocument(changesToPut, orderedCols); 57 | var s3Path = await PerformRequestAsync(s3Client, table, "upsert", document, changesToPut, "_Part" + part.ToString().PadLeft(5, '0')); 58 | 59 | Console.WriteLine($"Uploaded upsert to {s3Path} with {changesToPut.Count()} changes. {changeRecords.Count(x => x.ChangeType == ChangeType.INSERT || x.ChangeType == ChangeType.UPDATE_AFTER) - changesToPut.Count} redundant changes were omitted."); 60 | 61 | return s3Path; 62 | } 63 | 64 | public async Task PutS3DeletePartAsync(AmazonS3Client s3Client, string table, List changeRecords, List orderedCols, int part) 65 | { 66 | // where the last change to happen for a given record was a delete 67 | var changesToPut = GetValidChanges(changeRecords, ChangeType.DELETE); 68 | if (!changesToPut.Any()) 69 | return ""; 70 | 71 | var document = BuildDocument(changesToPut, orderedCols); 72 | var s3Path = await PerformRequestAsync(s3Client, table, "delete", document, changesToPut, "_Part" + part.ToString().PadLeft(5, '0')); 73 | 74 | Console.WriteLine($"Uploaded upsert to {s3Path} with {changesToPut.Count()} changes. {changeRecords.Count(x => x.ChangeType == ChangeType.DELETE) - changesToPut.Count} redundant changes were omitted."); 75 | 76 | return s3Path; 77 | } 78 | 79 | private string BuildDocument(List changesToPut, List orderedCols) 80 | { 81 | int count = changesToPut.Count; 82 | int ctr = 0; 83 | var sb = new StringBuilder(); 84 | foreach (var change in changesToPut) 85 | { 86 | ctr++; 87 | for (int i = 0; i < orderedCols.Count; i++) 88 | { 89 | object value = GetValue(orderedCols[i], change.Data); 90 | if (value is DateTime) 91 | { 92 | DateTime dt = (DateTime)value; 93 | sb.Append(dt.ToString("yyyy-MM-dd")); 94 | } 95 | else 96 | sb.Append(value.ToString()); 97 | 98 | if (i < orderedCols.Count - 1) 99 | sb.Append("|"); 100 | } 101 | 102 | if (ctr < count) 103 | sb.AppendLine(""); 104 | } 105 | 106 | return sb.ToString(); 107 | } 108 | 109 | private object GetValue(string column, Dictionary data) 110 | { 111 | foreach(var pair in data) 112 | { 113 | if (pair.Key.Equals(column, StringComparison.OrdinalIgnoreCase)) 114 | return pair.Value; 115 | } 116 | 117 | return ""; 118 | } 119 | 120 | private List GetValidChanges(List changeRecords, params ChangeType[] changeTypes) 121 | { 122 | var changesToPut = new List(); 123 | 124 | var groupedByRecordId = changeRecords.GroupBy(x => x.ChangeKey).ToList(); 125 | 126 | foreach (var changesOfRecord in groupedByRecordId) 127 | { 128 | var orderedChanges = changesOfRecord.OrderBy(x => x.LsnInteger).ThenBy(x => x.SeqValInteger).ToList(); 129 | var lastChange = orderedChanges.Last(); 130 | 131 | if (changeTypes.Contains(lastChange.ChangeType)) 132 | changesToPut.Add(lastChange); 133 | } 134 | 135 | return changesToPut; 136 | } 137 | 138 | private async Task PerformRequestAsync(AmazonS3Client s3Client, string table, string changeType, string document, List changesToPut, string suffix="") 139 | { 140 | var request = new PutObjectRequest() 141 | { 142 | BucketName = _bucketName, 143 | ContentBody = document, 144 | Key = $"{table}/{changeType}/{changesToPut.Min(x => x.Lsn).ToString()}{suffix}", 145 | ContentType = "text/plain" 146 | }; 147 | var response = await s3Client.PutObjectAsync(request); 148 | 149 | if (response.HttpStatusCode != System.Net.HttpStatusCode.OK) 150 | { 151 | // should check response and act in case of failure 152 | // this would need careful analysis of correct behaviour 153 | Console.WriteLine("Upload failure!"); 154 | return ""; 155 | } 156 | 157 | return $"s3://{request.BucketName}/{request.Key}"; 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Consumers/KeyedAvroConsumer.cs: -------------------------------------------------------------------------------- 1 | using Avro.Generic; 2 | using CdcTools.KafkaToRedshift.Redshift; 3 | using CdcTools.KafkaToRedshift.Serialization; 4 | using CdcTools.Redshift; 5 | using CdcTools.Redshift.Changes; 6 | using CdcTools.Redshift.S3; 7 | using Confluent.Kafka; 8 | using Confluent.Kafka.Serialization; 9 | using Newtonsoft.Json; 10 | using System; 11 | using System.Collections.Concurrent; 12 | using System.Collections.Generic; 13 | using System.Linq; 14 | using System.Text; 15 | using System.Threading; 16 | using System.Threading.Tasks; 17 | 18 | namespace CdcTools.KafkaToRedshift.Consumers 19 | { 20 | public class KeyedAvroConsumer : IConsumer 21 | { 22 | private IRedshiftWriter _redshiftWriter; 23 | private List _consumerTasks; 24 | private List _redshiftTasks; 25 | private string _kafkaBootstrapServers; 26 | private string _schemaRegistryUrl; 27 | 28 | public KeyedAvroConsumer(IRedshiftWriter redshiftWriter, string kafkaBootstrapServers, string schemaRegistryUrl) 29 | { 30 | _redshiftWriter = redshiftWriter; 31 | _consumerTasks = new List(); 32 | _redshiftTasks = new List(); 33 | 34 | _kafkaBootstrapServers = kafkaBootstrapServers; 35 | _schemaRegistryUrl = schemaRegistryUrl; 36 | } 37 | 38 | public async Task StartConsumingAsync(CancellationToken token, TimeSpan windowSizePeriod, int windowSizeItems, List kafkaSources) 39 | { 40 | var columnsLoaded = await CacheRedshiftColumns(kafkaSources.Select(x => x.Table).ToList()); 41 | if (!columnsLoaded) 42 | return columnsLoaded; 43 | 44 | foreach (var kafkaSource in kafkaSources) 45 | { 46 | var accumulatedChanges = new BlockingCollection>(); 47 | _consumerTasks.Add(Task.Run(() => 48 | { 49 | try 50 | { 51 | Consume(token, accumulatedChanges, kafkaSource.Topic, kafkaSource.Table); 52 | } 53 | catch (Exception ex) 54 | { 55 | Console.WriteLine($"Consumer failure. Table: {kafkaSource.Table}. Error: {ex}"); 56 | } 57 | })); 58 | 59 | _redshiftTasks.Add(Task.Run(async () => 60 | { 61 | try 62 | { 63 | await _redshiftWriter.StartWritingAsync(token, windowSizePeriod, windowSizeItems, kafkaSource.Table, accumulatedChanges); 64 | } 65 | catch (Exception ex) 66 | { 67 | Console.WriteLine($"Redshift Writer failure. Table: {kafkaSource.Table}. Error: {ex}"); 68 | } 69 | })); 70 | } 71 | 72 | return columnsLoaded; 73 | } 74 | 75 | public void WaitForCompletion() 76 | { 77 | Task.WaitAll(_consumerTasks.ToArray()); 78 | Task.WaitAll(_redshiftTasks.ToArray()); 79 | } 80 | 81 | private async Task CacheRedshiftColumns(List tables) 82 | { 83 | try 84 | { 85 | await _redshiftWriter.CacheTableColumnsAsync(tables); 86 | return true; 87 | } 88 | catch (Exception ex) 89 | { 90 | Console.WriteLine($"Failed getting Redshift column meta data. {ex}"); 91 | return false; 92 | } 93 | } 94 | 95 | private void Consume(CancellationToken token, BlockingCollection> accumulatedChanges, string topic, string table) 96 | { 97 | var conf = new Dictionary 98 | { 99 | { "group.id", $"{table}-consumer-group" }, 100 | { "bootstrap.servers", _kafkaBootstrapServers }, 101 | { "statistics.interval.ms", 60000 }, 102 | { "schema.registry.url", _schemaRegistryUrl } 103 | }; 104 | 105 | foreach (var confPair in conf) 106 | Console.WriteLine(topic + " - " + confPair.Key + ": " + confPair.Value); 107 | 108 | AvroTableTypeConverter avroTableTypeConverter = null; 109 | 110 | using (var consumer = new Consumer(conf, new StringDeserializer(Encoding.UTF8), new AvroDeserializer())) 111 | { 112 | consumer.OnError += (_, msg) 113 | => Console.WriteLine($"{topic} - Error: {msg.Reason}"); 114 | 115 | consumer.OnConsumeError += (_, msg) 116 | => Console.WriteLine($"{topic} - Consume error: {msg.Error.Reason}"); 117 | 118 | consumer.OnPartitionsAssigned += (_, partitions) => 119 | { 120 | Console.WriteLine($"{topic} - Assigned partitions: [{string.Join(", ", partitions)}], member id: {consumer.MemberId}"); 121 | consumer.Assign(partitions); 122 | }; 123 | 124 | consumer.OnPartitionsRevoked += (_, partitions) => 125 | { 126 | Console.WriteLine($"{topic} - Revoked partitions: [{string.Join(", ", partitions)}]"); 127 | consumer.Unassign(); 128 | }; 129 | 130 | Console.WriteLine($"Subscribing to topic {topic}"); 131 | consumer.Subscribe(topic); 132 | int secondsWithoutMessage = 0; 133 | 134 | while (!token.IsCancellationRequested) 135 | { 136 | Message msg = null; 137 | if (consumer.Consume(out msg, TimeSpan.FromSeconds(1))) 138 | { 139 | if (avroTableTypeConverter == null) 140 | avroTableTypeConverter = new AvroTableTypeConverter(msg.Value.Schema); 141 | else if (!avroTableTypeConverter.SchemaMatches(msg.Value.Schema)) 142 | avroTableTypeConverter = new AvroTableTypeConverter(msg.Value.Schema); 143 | 144 | AddToBuffer(consumer, msg, accumulatedChanges, avroTableTypeConverter); 145 | secondsWithoutMessage = 0; 146 | } 147 | else 148 | { 149 | secondsWithoutMessage++; 150 | if (secondsWithoutMessage % 30 == 0) 151 | Console.WriteLine($"{topic}: No messages in last {secondsWithoutMessage} seconds"); 152 | 153 | Task.Delay(100).Wait(); 154 | } 155 | } 156 | } 157 | 158 | accumulatedChanges.CompleteAdding(); // notifies consumers that no more messages will come 159 | } 160 | 161 | private void AddToBuffer(Consumer consumer, 162 | Message avroMessage, 163 | BlockingCollection> accumulatedChanges, 164 | AvroTableTypeConverter avroTableTypeConverter) 165 | { 166 | var tableChange = avroTableTypeConverter.GetRowChange(avroMessage.Value); 167 | var msg = new MessageProxy(consumer, avroMessage) { Payload = tableChange }; 168 | accumulatedChanges.Add(msg); 169 | } 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToKafka.Streaming/Program.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader; 2 | using Microsoft.Extensions.Configuration; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.IO; 6 | using System.Linq; 7 | using System.Runtime.Loader; 8 | using System.Threading; 9 | using System.Threading.Tasks; 10 | 11 | namespace CdcTools.CdcToKafka.Streaming 12 | { 13 | class Program 14 | { 15 | static void Main(string[] args) 16 | { 17 | Console.Title = "CDC To Kafka Streamer"; 18 | 19 | // support graceful shutdown in Docker 20 | var ended = new ManualResetEventSlim(); 21 | var starting = new ManualResetEventSlim(); 22 | 23 | AssemblyLoadContext.Default.Unloading += ctx => 24 | { 25 | System.Console.WriteLine("Unloading fired"); 26 | starting.Set(); 27 | System.Console.WriteLine("Waiting for completion"); 28 | ended.Wait(); 29 | }; 30 | 31 | // set up configuration 32 | var builder = new ConfigurationBuilder() 33 | .SetBasePath(Directory.GetCurrentDirectory()) 34 | .AddJsonFile("appsettings.json", optional: true, reloadOnChange: true) 35 | .AddCommandLine(args) 36 | .AddEnvironmentVariables("CDCTOOLS_"); // all environment variables with this prefix; 37 | 38 | IConfigurationRoot configuration = builder.Build(); 39 | 40 | // get parameters and start 41 | var executionId = GetExecutionId(configuration); 42 | var runMode = GetRunMode(configuration); 43 | var tables = GetTables(configuration); 44 | var serializationMode = GetSerializationMode(configuration); 45 | var sendWithKey = GetSendWithKey(configuration); 46 | var batchSize = GetBatchSize(configuration); 47 | var kafkaBootstrapServers = GetBootstrapServers(configuration); 48 | var schemaRegistryUrl = GetSchemaRegistryUrl(configuration); 49 | var cdcReaderClient = new CdcReaderClient(configuration["DatabaseConnection"], configuration["StateManagmentConnection"]); 50 | var cts = new CancellationTokenSource(); 51 | 52 | if(runMode == RunMode.FullLoad) 53 | { 54 | var printMod = GetPrintMod(configuration); 55 | var fullLoadStreamer = new FullLoadStreamer(configuration, cdcReaderClient); 56 | fullLoadStreamer.StreamTablesAsync(cts.Token, executionId, tables, serializationMode, sendWithKey, batchSize, printMod).Wait(); 57 | Console.WriteLine("Streaming to Kafka in progress."); 58 | 59 | Thread.Sleep(2000); 60 | bool shutdown = false; 61 | // wait for shutdown signal 62 | #if DEBUG 63 | Console.WriteLine("Press any key to shutdown"); 64 | 65 | while (!shutdown) 66 | { 67 | if (Console.KeyAvailable) 68 | shutdown = true; 69 | else if (fullLoadStreamer.HasFinished()) 70 | shutdown = true; 71 | 72 | Thread.Sleep(500); 73 | } 74 | #else 75 | while (!shutdown) 76 | { 77 | if (starting.IsSet) 78 | shutdown = true; 79 | else if (fullLoadStreamer.HasFinished()) 80 | shutdown = true; 81 | 82 | Thread.Sleep(500); 83 | } 84 | #endif 85 | 86 | Console.WriteLine("Received signal gracefully shutting down"); 87 | cts.Cancel(); 88 | fullLoadStreamer.WaitForCompletion(); 89 | ended.Set(); 90 | } 91 | else 92 | { 93 | var interval = GetInterval(configuration); 94 | 95 | var cdcRequest = new CdcRequest() 96 | { 97 | BatchSize = batchSize, 98 | ExecutionId = executionId, 99 | Interval = interval, 100 | SendWithKey = sendWithKey, 101 | SerializationMode = serializationMode, 102 | Tables = tables 103 | }; 104 | var cdcStreamer = new ChangeStreamer(configuration, cdcReaderClient); 105 | cdcStreamer.StartReading(cts.Token, cdcRequest); 106 | Console.WriteLine("Streaming to Kafka started."); 107 | 108 | // wait for shutdown signal 109 | #if DEBUG 110 | Console.WriteLine("Press any key to shutdown"); 111 | Console.ReadKey(); 112 | #else 113 | starting.Wait(); 114 | #endif 115 | 116 | Console.WriteLine("Received signal gracefully shutting down"); 117 | cts.Cancel(); 118 | cdcStreamer.WaitForCompletion(); 119 | ended.Set(); 120 | } 121 | } 122 | 123 | private static string GetExecutionId(IConfiguration configuration) 124 | { 125 | if (configuration["ExecutionId"] == null) 126 | return Guid.NewGuid().ToString(); 127 | 128 | return configuration["ExecutionId"]; 129 | } 130 | 131 | private static RunMode GetRunMode(IConfiguration configuration) 132 | { 133 | var mode = configuration["Mode"]; 134 | if (mode != null) 135 | { 136 | if (mode.Equals("cdc-nontran")) 137 | return RunMode.NonTransactionalCdc; 138 | else if (mode.Equals("cdc-tran")) 139 | throw new NotSupportedException("cdc-tran mode is not supported at ths time."); 140 | else if (mode.Equals("full-load")) 141 | return RunMode.FullLoad; 142 | } 143 | 144 | return RunMode.NonTransactionalCdc; 145 | } 146 | 147 | private static List GetTables(IConfiguration configuration) 148 | { 149 | if (configuration["Tables"] != null) 150 | { 151 | return configuration["Tables"].Split(',').ToList(); 152 | } 153 | else 154 | { 155 | return new List(); 156 | } 157 | } 158 | 159 | private static TimeSpan GetInterval(IConfiguration configuration) 160 | { 161 | return TimeSpan.FromMilliseconds(int.Parse(configuration["IntervalMs"])); 162 | } 163 | 164 | private static SerializationMode GetSerializationMode(IConfiguration configuration) 165 | { 166 | return (SerializationMode)Enum.Parse(typeof(SerializationMode), configuration["SerializationMode"]); 167 | } 168 | 169 | private static int GetBatchSize(IConfiguration configuration) 170 | { 171 | return int.Parse(configuration["BatchSize"]); 172 | } 173 | 174 | private static bool GetSendWithKey(IConfiguration configuration) 175 | { 176 | return bool.Parse(configuration["SendWithKey"]); 177 | } 178 | 179 | private static int GetPrintMod(IConfiguration configuration) 180 | { 181 | return int.Parse(configuration["PrintPercentProgressMod"]); 182 | } 183 | 184 | private static string GetBootstrapServers(IConfiguration configuration) 185 | { 186 | return configuration["KafkaBootstrapServers"]; 187 | } 188 | 189 | private static string GetSchemaRegistryUrl(IConfiguration configuration) 190 | { 191 | return configuration["KafkaSchemaRegistryUrl"]; 192 | } 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/CdcTools.KafkaToRedshift/Consumers/NonKeyedAvroConsumer.cs: -------------------------------------------------------------------------------- 1 | using Avro.Generic; 2 | using CdcTools.KafkaToRedshift.Redshift; 3 | using CdcTools.KafkaToRedshift.Serialization; 4 | using CdcTools.Redshift; 5 | using CdcTools.Redshift.Changes; 6 | using CdcTools.Redshift.S3; 7 | using Confluent.Kafka; 8 | using Confluent.Kafka.Serialization; 9 | using Newtonsoft.Json; 10 | using System; 11 | using System.Collections.Concurrent; 12 | using System.Collections.Generic; 13 | using System.Linq; 14 | using System.Text; 15 | using System.Threading; 16 | using System.Threading.Tasks; 17 | 18 | namespace CdcTools.KafkaToRedshift.Consumers 19 | { 20 | public class NonKeyedAvroConsumer : IConsumer 21 | { 22 | private IRedshiftWriter _redshiftWriter; 23 | private List _consumerTasks; 24 | private List _redshiftTasks; 25 | private string _kafkaBootstrapServers; 26 | private string _schemaRegistryUrl; 27 | 28 | public NonKeyedAvroConsumer(IRedshiftWriter redshiftWriter, string kafkaBootstrapServers, string schemaRegistryUrl) 29 | { 30 | _redshiftWriter = redshiftWriter; 31 | _consumerTasks = new List(); 32 | _redshiftTasks = new List(); 33 | 34 | _kafkaBootstrapServers = kafkaBootstrapServers; 35 | _schemaRegistryUrl = schemaRegistryUrl; 36 | } 37 | 38 | public async Task StartConsumingAsync(CancellationToken token, TimeSpan windowSizePeriod, int windowSizeItems, List kafkaSources) 39 | { 40 | var columnsLoaded = await CacheRedshiftColumns(kafkaSources.Select(x => x.Table).ToList()); 41 | if (!columnsLoaded) 42 | return columnsLoaded; 43 | 44 | foreach (var kafkaSource in kafkaSources) 45 | { 46 | var accumulatedChanges = new BlockingCollection>(5000); 47 | _consumerTasks.Add(Task.Run(() => 48 | { 49 | try 50 | { 51 | Consume(token, accumulatedChanges, kafkaSource.Topic, kafkaSource.Table); 52 | } 53 | catch (Exception ex) 54 | { 55 | Console.WriteLine($"Consumer failure. Table: {kafkaSource.Table}. Error: {ex}"); 56 | } 57 | })); 58 | 59 | _redshiftTasks.Add(Task.Run(async () => 60 | { 61 | try 62 | { 63 | await _redshiftWriter.StartWritingAsync(token, windowSizePeriod, windowSizeItems, kafkaSource.Table, accumulatedChanges); 64 | } 65 | catch (Exception ex) 66 | { 67 | Console.WriteLine($"Redshift Writer failure. Table: {kafkaSource.Table}. Error: {ex}"); 68 | } 69 | })); 70 | } 71 | 72 | return columnsLoaded; 73 | } 74 | 75 | public void WaitForCompletion() 76 | { 77 | Task.WaitAll(_consumerTasks.ToArray()); 78 | Task.WaitAll(_redshiftTasks.ToArray()); 79 | } 80 | 81 | private async Task CacheRedshiftColumns(List tables) 82 | { 83 | try 84 | { 85 | await _redshiftWriter.CacheTableColumnsAsync(tables); 86 | return true; 87 | } 88 | catch (Exception ex) 89 | { 90 | Console.WriteLine($"Failed getting Redshift column meta data. {ex}"); 91 | return false; 92 | } 93 | } 94 | 95 | private void Consume(CancellationToken token, BlockingCollection> accumulatedChanges, string topic, string table) 96 | { 97 | var conf = new Dictionary 98 | { 99 | { "group.id", $"{table}-consumer-group" }, 100 | { "statistics.interval.ms", 60000 }, 101 | { "bootstrap.servers", _kafkaBootstrapServers }, 102 | { "schema.registry.url", _schemaRegistryUrl } 103 | }; 104 | 105 | foreach (var confPair in conf) 106 | Console.WriteLine(topic + " - " + confPair.Key + ": " + confPair.Value); 107 | 108 | AvroTableTypeConverter avroTableTypeConverter = null; 109 | 110 | using (var consumer = new Consumer(conf, null, new AvroDeserializer())) 111 | { 112 | //consumer.OnPartitionEOF += (_, end) 113 | // => Console.WriteLine($"Reached end of topic {end.Topic} partition {end.Partition}, next message will be at offset {end.Offset}"); 114 | 115 | consumer.OnError += (_, msg) 116 | => Console.WriteLine($"{topic} - Error: {msg.Reason}"); 117 | 118 | consumer.OnConsumeError += (_, msg) 119 | => Console.WriteLine($"{topic} - Consume error: {msg.Error.Reason}"); 120 | 121 | consumer.OnPartitionsAssigned += (_, partitions) => 122 | { 123 | Console.WriteLine($"{topic} - Assigned partitions: [{string.Join(", ", partitions)}], member id: {consumer.MemberId}"); 124 | consumer.Assign(partitions); 125 | }; 126 | 127 | consumer.OnPartitionsRevoked += (_, partitions) => 128 | { 129 | Console.WriteLine($"{topic} - Revoked partitions: [{string.Join(", ", partitions)}]"); 130 | consumer.Unassign(); 131 | }; 132 | 133 | //consumer.OnStatistics += (_, json) 134 | // => Console.WriteLine($"{topic} - Statistics: {json}"); 135 | 136 | Console.WriteLine($"Subscribing to topic {topic}"); 137 | consumer.Subscribe(topic); 138 | int secondsWithoutMessage = 0; 139 | 140 | while (!token.IsCancellationRequested) 141 | { 142 | Message msg = null; 143 | if (consumer.Consume(out msg, TimeSpan.FromSeconds(1))) 144 | { 145 | if (avroTableTypeConverter == null) 146 | avroTableTypeConverter = new AvroTableTypeConverter(msg.Value.Schema); 147 | else if (!avroTableTypeConverter.SchemaMatches(msg.Value.Schema)) 148 | avroTableTypeConverter = new AvroTableTypeConverter(msg.Value.Schema); 149 | 150 | AddToBuffer(consumer, msg, accumulatedChanges, avroTableTypeConverter); 151 | secondsWithoutMessage = 0; 152 | } 153 | else 154 | { 155 | secondsWithoutMessage++; 156 | if (secondsWithoutMessage % 30 == 0) 157 | Console.WriteLine($"{topic}: No messages in last {secondsWithoutMessage} seconds"); 158 | 159 | Task.Delay(100).Wait(); 160 | } 161 | 162 | } 163 | } 164 | 165 | accumulatedChanges.CompleteAdding(); // notifies consumers that no more messages will come 166 | } 167 | 168 | private void AddToBuffer(Consumer consumer, 169 | Message avroMessage, 170 | BlockingCollection> accumulatedChanges, 171 | AvroTableTypeConverter avroTableTypeConverter) 172 | { 173 | var tableChange = avroTableTypeConverter.GetRowChange(avroMessage.Value); 174 | var msg = new MessageProxy(consumer, avroMessage) { Payload = tableChange }; 175 | accumulatedChanges.Add(msg); 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/Transactional/TransactionExporter.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader.Changes; 2 | using CdcTools.CdcReader.Transactional; 3 | using CdcTools.CdcReader.Transactional.State; 4 | using CdcTools.Redshift; 5 | using CdcTools.Redshift.Changes; 6 | using System; 7 | using System.Collections.Generic; 8 | using System.Diagnostics; 9 | using System.Linq; 10 | using System.Text; 11 | using System.Threading; 12 | using System.Threading.Tasks; 13 | 14 | namespace CdcTools.CdcToRedshift.Transactional 15 | { 16 | public class TransactionExporter 17 | { 18 | private Task _exporterTask; 19 | private CdcTransactionClient _cdcTransactionClient; 20 | private RedshiftClient _redshiftClient; 21 | 22 | public TransactionExporter(CdcTransactionClient cdcTransactionClient, 23 | RedshiftClient redshiftClient) 24 | { 25 | _cdcTransactionClient = cdcTransactionClient; 26 | _redshiftClient = redshiftClient; 27 | } 28 | 29 | public async Task StartExportingChangesAsync(CancellationToken token, 30 | string executionId, 31 | List tables, 32 | TimeSpan interval, 33 | int perTableBufferLimit, 34 | int transactionBufferLimit, 35 | int transactionBatchSizeLimit) 36 | { 37 | await _redshiftClient.CacheTableColumnsAsync(tables); 38 | 39 | _exporterTask = Task.Run(async () => 40 | { 41 | while (!token.IsCancellationRequested) 42 | { 43 | try 44 | { 45 | await StartExportingAsync(token, 46 | executionId, 47 | tables, 48 | interval, 49 | perTableBufferLimit, 50 | transactionBufferLimit, 51 | transactionBatchSizeLimit); 52 | } 53 | catch (Exception ex) 54 | { 55 | Console.WriteLine($"Transaction reader failure. Will restart in 30 seconds. Error: {ex}"); 56 | await WaitForSeconds(token, 30); 57 | } 58 | } 59 | }); 60 | } 61 | 62 | public void WaitForCompletion() 63 | { 64 | _exporterTask.Wait(); 65 | } 66 | 67 | private async Task StartExportingAsync(CancellationToken token, 68 | string executionId, 69 | List tables, 70 | TimeSpan interval, 71 | int perTableBufferLimit, 72 | int transactionBufferLimit, 73 | int transactionBatchSizeLimit) 74 | { 75 | var lastTran = await _cdcTransactionClient.GetLastTransactionIdAsync(executionId); 76 | if (lastTran.Result == Result.NoStoredTransationId) 77 | await _cdcTransactionClient.StartAsync(tables, perTableBufferLimit, transactionBufferLimit, transactionBatchSizeLimit); 78 | else 79 | await _cdcTransactionClient.StartAsync(tables, perTableBufferLimit, transactionBufferLimit, transactionBatchSizeLimit, lastTran.State.Lsn); 80 | 81 | string uncommittedLsn = string.Empty; 82 | bool haveUncommitedParts = false; 83 | 84 | while (!token.IsCancellationRequested) 85 | { 86 | var batches = new List(); 87 | var sw = new Stopwatch(); 88 | sw.Start(); 89 | 90 | while (sw.Elapsed <= interval && !token.IsCancellationRequested) 91 | { 92 | var transactionBatch = await _cdcTransactionClient.NextAsync(token, interval); 93 | if (transactionBatch != null) 94 | { 95 | // if we have uncommitted multi-part transactions and the latest transaction is not the uncommitted one 96 | // then we need to commit it now before continuing 97 | if(haveUncommitedParts && !uncommittedLsn.Equals(transactionBatch.Id.LsnStr)) 98 | { 99 | await _redshiftClient.CommitMultiplePartsAsync(uncommittedLsn); 100 | haveUncommitedParts = false; 101 | uncommittedLsn = string.Empty; 102 | } 103 | 104 | if (transactionBatch.IsMultiPart) 105 | { 106 | // if this is a multi-part transaction then trigger the upload of any accumulated transactions before starting to process the multi-part transaction 107 | if(!haveUncommitedParts && batches.Any()) 108 | { 109 | await UploadBatchesAsync(batches); 110 | batches = new List(); 111 | sw.Reset(); 112 | sw.Start(); 113 | } 114 | 115 | foreach (var tableGroup in transactionBatch.Changes.GroupBy(x => x.TableName)) 116 | { 117 | var orderedTableChanges = tableGroup.OrderBy(x => x.LsnInt).ThenBy(x => x.SeqValInt).ToList(); 118 | var rowChanges = ConvertToRowChanges(orderedTableChanges); 119 | await _redshiftClient.StorePartAsCsvAsync(transactionBatch.Id.LsnStr, tableGroup.Key, transactionBatch.Part, rowChanges); 120 | } 121 | 122 | haveUncommitedParts = true; 123 | } 124 | else 125 | { 126 | batches.Add(transactionBatch); 127 | } 128 | } 129 | } 130 | 131 | if (batches.Any()) 132 | { 133 | // upload accumulated non multi-part batches to Redshift 134 | Console.WriteLine($"Uploading {batches.Count} transactions with a total of {batches.SelectMany(x => x.Changes).Count()} changes"); 135 | await UploadBatchesAsync(batches); 136 | 137 | // store our highest uploaded current transaction id in our state store 138 | await _cdcTransactionClient.StoreTransactionIdAsync(executionId, batches.Last().Id); 139 | } 140 | } 141 | 142 | _cdcTransactionClient.Stop(); 143 | } 144 | 145 | private async Task UploadBatchesAsync(List batches) 146 | { 147 | var tableRowChanges = new Dictionary>(); 148 | foreach(var batch in batches) 149 | { 150 | foreach(var tableGroup in batch.Changes.GroupBy(x => x.TableName)) 151 | { 152 | var orderedChanges = tableGroup.OrderBy(x => x.LsnInt).ThenBy(x => x.SeqValInt).ToList(); 153 | if (!tableRowChanges.ContainsKey(tableGroup.Key)) 154 | tableRowChanges.Add(tableGroup.Key, new List()); 155 | 156 | tableRowChanges[tableGroup.Key].AddRange(ConvertToRowChanges(orderedChanges)); 157 | } 158 | } 159 | 160 | await _redshiftClient.UploadAsCsvAsync(tableRowChanges); 161 | } 162 | 163 | private List ConvertToRowChanges(List changeRecords) 164 | { 165 | return changeRecords.Select(x => new RowChange() 166 | { 167 | ChangeKey = x.ChangeKey, 168 | ChangeType = (Redshift.Changes.ChangeType)x.ChangeType, 169 | Data = x.Data, 170 | Lsn = x.LsnStr, 171 | SeqVal = x.SeqValStr 172 | }).ToList(); 173 | } 174 | 175 | private async Task WaitForSeconds(CancellationToken token, int seconds) 176 | { 177 | int waited = 0; 178 | 179 | while (waited < seconds && !token.IsCancellationRequested) 180 | { 181 | await Task.Delay(1000); 182 | waited++; 183 | } 184 | } 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/CdcTools.CdcToRedshift/FullLoadExporter.cs: -------------------------------------------------------------------------------- 1 | using CdcTools.CdcReader; 2 | using CdcTools.CdcReader.Changes; 3 | using CdcTools.CdcReader.State; 4 | using CdcTools.CdcReader.Tables; 5 | using CdcTools.Redshift; 6 | using CdcTools.Redshift.Changes; 7 | using Microsoft.Extensions.Configuration; 8 | using System; 9 | using System.Collections.Generic; 10 | using System.Linq; 11 | using System.Text; 12 | using System.Threading; 13 | using System.Threading.Tasks; 14 | 15 | namespace CdcTools.CdcToRedshift 16 | { 17 | public class FullLoadExporter 18 | { 19 | private List _loadTasks; 20 | private CdcReaderClient _cdcReaderClient; 21 | private RedshiftClient _redshiftClient; 22 | 23 | public FullLoadExporter(CdcReaderClient cdcReaderClient, RedshiftClient redshiftClient) 24 | { 25 | _cdcReaderClient = cdcReaderClient; 26 | _redshiftClient = redshiftClient; 27 | _loadTasks = new List(); 28 | } 29 | 30 | public async Task ExportTablesAsync(CancellationToken token, 31 | string executionId, 32 | List tables, 33 | int batchSize, 34 | int printMod) 35 | { 36 | await _redshiftClient.CacheTableColumnsAsync(tables); 37 | 38 | foreach (var tableName in tables) 39 | { 40 | var tableSchema = await _cdcReaderClient.GetTableSchemaAsync(tableName); 41 | _loadTasks.Add(Task.Run(async () => 42 | { 43 | try 44 | { 45 | await ExportTableAsync(token, 46 | executionId, 47 | tableSchema, 48 | batchSize, 49 | printMod); 50 | } 51 | catch (Exception ex) 52 | { 53 | Console.WriteLine(ex); 54 | } 55 | })); 56 | } 57 | } 58 | 59 | public void WaitForCompletion() 60 | { 61 | Task.WaitAll(_loadTasks.ToArray()); 62 | } 63 | 64 | public bool HasFinished() 65 | { 66 | return _loadTasks.All(x => x.IsCompleted); 67 | } 68 | 69 | private async Task ExportTableAsync(CancellationToken token, 70 | string executionId, 71 | TableSchema tableSchema, 72 | int batchSize, 73 | int printPercentProgressMod) 74 | { 75 | var rowCount = await _cdcReaderClient.GetRowCountAsync(tableSchema); 76 | Console.WriteLine($"Table {tableSchema.TableName} - {rowCount} rows to export"); 77 | int progress = 0; 78 | 79 | PrimaryKeyValue lastRetrievedKey = await SetStartingPosition(executionId, tableSchema, batchSize); 80 | long ctr = batchSize; 81 | bool finished = false; 82 | 83 | while (!token.IsCancellationRequested && !finished) 84 | { 85 | var changes = new List(); 86 | 87 | var batch = await _cdcReaderClient.GetBatchAsync(tableSchema, lastRetrievedKey, batchSize); 88 | var result = await WriteToRedshiftAsync(batch, ctr); 89 | if (result.Item1) 90 | { 91 | ctr = result.Item2; 92 | int latestProgress = (int)(((double)ctr / (double)rowCount) * 100); 93 | if (progress != latestProgress && latestProgress % printPercentProgressMod == 0) 94 | Console.WriteLine($"Table {tableSchema.TableName} - Progress at {latestProgress}% ({ctr} records)"); 95 | 96 | progress = latestProgress; 97 | lastRetrievedKey = batch.LastRowKey; 98 | if(batch.Records.Any()) 99 | await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, lastRetrievedKey); 100 | 101 | if (!batch.Records.Any() || batch.Records.Count < batchSize) 102 | finished = true; 103 | } 104 | else 105 | { 106 | Console.WriteLine($"Table {tableSchema.TableName} - Failed to upload to Redshift. Will try again in 10 seconds."); 107 | await WaitForSeconds(token, 10); 108 | } 109 | } 110 | 111 | if (token.IsCancellationRequested) 112 | Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - cancelled at progress at {progress}% ({ctr} records)"); 113 | else 114 | Console.WriteLine($"Table {tableSchema.Schema}.{tableSchema.TableName} - complete ({ctr} records)"); 115 | } 116 | 117 | private async Task SetStartingPosition(string executionId, TableSchema tableSchema, int batchSize) 118 | { 119 | PrimaryKeyValue lastRetrievedKey = null; 120 | long ctr = 0; 121 | var existingOffsetResult = await _cdcReaderClient.GetLastFullLoadOffsetAsync(executionId, tableSchema.TableName); 122 | if (existingOffsetResult.Result == Result.NoStoredState) 123 | { 124 | Console.WriteLine($"Table {tableSchema.TableName} - No previous stored offset. Starting from first row"); 125 | var firstBatch = await _cdcReaderClient.GetFirstBatchAsync(tableSchema, batchSize); 126 | if (firstBatch.Records.Any()) 127 | { 128 | lastRetrievedKey = firstBatch.LastRowKey; 129 | var result = await WriteToRedshiftAsync(firstBatch, ctr); 130 | if (!result.Item1) 131 | { 132 | Console.WriteLine($"Table {tableSchema.TableName} - Export aborted"); 133 | return null; 134 | } 135 | 136 | await _cdcReaderClient.StoreFullLoadOffsetAsync(executionId, tableSchema.TableName, lastRetrievedKey); 137 | 138 | ctr = result.Item2; 139 | Console.WriteLine($"Table {tableSchema.TableName} - Written first batch to Redshift"); 140 | } 141 | else 142 | { 143 | Console.WriteLine($"Table {tableSchema.TableName} - No data to export"); 144 | return null; 145 | } 146 | } 147 | else 148 | { 149 | Console.WriteLine($"Table {tableSchema.TableName} - Starting from stored offset"); 150 | lastRetrievedKey = existingOffsetResult.State; 151 | } 152 | 153 | return lastRetrievedKey; 154 | } 155 | 156 | private async Task> WriteToRedshiftAsync(FullLoadBatch batch, long ctr) 157 | { 158 | if (batch.Records.Any()) 159 | { 160 | var rowChanges = new List(); 161 | foreach (var record in batch.Records) 162 | { 163 | rowChanges.Add(new RowChange() 164 | { 165 | ChangeKey = record.ChangeKey, 166 | ChangeType = CdcTools.Redshift.Changes.ChangeType.INSERT, 167 | Data = record.Data, 168 | Lsn = ctr.ToString(), 169 | SeqVal = ctr.ToString() 170 | }); 171 | ctr++; 172 | } 173 | 174 | try 175 | { 176 | await _redshiftClient.UploadAsCsvAsync(batch.TableSchema.TableName, rowChanges); 177 | return Tuple.Create(true, ctr); 178 | } 179 | catch(Exception ex) 180 | { 181 | Console.WriteLine($"{batch.TableSchema.TableName} upload failed. {ex}"); 182 | return Tuple.Create(false, ctr); 183 | } 184 | } 185 | 186 | return Tuple.Create(true, ctr); 187 | } 188 | 189 | private async Task WaitForSeconds(CancellationToken token, int seconds) 190 | { 191 | int waited = 0; 192 | 193 | while (waited < seconds && !token.IsCancellationRequested) 194 | { 195 | await Task.Delay(1000); 196 | waited++; 197 | } 198 | } 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /src/CdcTools.CdcReader/Tables/FullLoadRepository.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using CdcTools.CdcReader.Changes; 5 | using System.Data.SqlClient; 6 | using System.Threading.Tasks; 7 | using System.Linq; 8 | using System.Data; 9 | 10 | namespace CdcTools.CdcReader.Tables 11 | { 12 | public class FullLoadRepository : IFullLoadRepository 13 | { 14 | private string _connString; 15 | 16 | public FullLoadRepository(string connectionString) 17 | { 18 | _connString = connectionString; 19 | } 20 | 21 | public async Task GetRowCountAsync(TableSchema tableSchema) 22 | { 23 | using (var conn = await GetOpenConnectionAsync()) 24 | { 25 | var command = conn.CreateCommand(); 26 | command.CommandText = $"SELECT COUNT(*) FROM {tableSchema.Schema}.{tableSchema.TableName}"; 27 | return (int) await command.ExecuteScalarAsync(); 28 | } 29 | } 30 | 31 | public async Task GetFirstBatchAsync(TableSchema tableSchema, int batchSize) 32 | { 33 | var batch = new FullLoadBatch(); 34 | batch.TableSchema = tableSchema; 35 | 36 | using (var conn = await GetOpenConnectionAsync()) 37 | { 38 | var command = conn.CreateCommand(); 39 | command.CommandText = $"SELECT TOP {batchSize} * FROM {tableSchema.Schema}.{tableSchema.TableName} ORDER BY {tableSchema.GetOrderedPrimaryKeyColumns()};"; 40 | 41 | using (var reader = await command.ExecuteReaderAsync()) 42 | { 43 | int ctr = 1; 44 | while (await reader.ReadAsync()) 45 | { 46 | var change = new FullLoadRecord(); 47 | change.ChangeKey = GetRecordId(reader, tableSchema); 48 | change.BatchSeqNo = ctr; 49 | 50 | foreach (var column in tableSchema.Columns) 51 | change.Data.Add(column.Name, reader[column.Name]); 52 | 53 | batch.Records.Add(change); 54 | ctr++; 55 | } 56 | } 57 | } 58 | 59 | if (batch.Records.Any()) 60 | { 61 | batch.FirstRowKey = GetKey(batch.Records.First(), tableSchema); 62 | batch.LastRowKey = GetKey(batch.Records.Last(), tableSchema); 63 | } 64 | 65 | return batch; 66 | } 67 | 68 | public async Task GetBatchAsync(TableSchema tableSchema, PrimaryKeyValue lastRetrievedKey, int batchSize) 69 | { 70 | var batch = new FullLoadBatch(); 71 | batch.TableSchema = tableSchema; 72 | 73 | using (var conn = await GetOpenConnectionAsync()) 74 | { 75 | var command = conn.CreateCommand(); 76 | command.CommandText = TableSchemaQueryBuilder.GetExtractQueryUsingAllKeys(tableSchema, batchSize); 77 | 78 | foreach(var pk in tableSchema.PrimaryKeys.OrderBy(x => x.OrdinalPosition)) 79 | { 80 | var columnSchema = tableSchema.GetColumn(pk.ColumnName); 81 | var value = lastRetrievedKey.GetValue(pk.OrdinalPosition); 82 | command.Parameters.Add(CreateSqlParameter(columnSchema, "@p"+pk.OrdinalPosition, value)); 83 | } 84 | 85 | using (var reader = await command.ExecuteReaderAsync()) 86 | { 87 | int ctr = 1; 88 | while (await reader.ReadAsync()) 89 | { 90 | var change = new FullLoadRecord(); 91 | change.ChangeKey = GetRecordId(reader, tableSchema); 92 | change.BatchSeqNo = ctr; 93 | 94 | foreach (var column in tableSchema.Columns) 95 | change.Data.Add(column.Name, reader[column.Name]); 96 | 97 | batch.Records.Add(change); 98 | ctr++; 99 | } 100 | } 101 | } 102 | 103 | if (batch.Records.Any()) 104 | { 105 | batch.FirstRowKey = GetKey(batch.Records.First(), tableSchema); 106 | batch.LastRowKey = GetKey(batch.Records.Last(), tableSchema); 107 | } 108 | 109 | return batch; 110 | } 111 | 112 | private PrimaryKeyValue GetKey(FullLoadRecord record, TableSchema tableSchema) 113 | { 114 | var pkVal = new PrimaryKeyValue(); 115 | 116 | foreach (var pkCol in tableSchema.PrimaryKeys) 117 | pkVal.AddKeyValue(pkCol.OrdinalPosition, pkCol.ColumnName, record.Data[pkCol.ColumnName]); 118 | 119 | return pkVal; 120 | } 121 | 122 | private string GetRecordId(SqlDataReader reader, TableSchema tableSchema) 123 | { 124 | if (tableSchema.PrimaryKeys.Count == 1) 125 | return reader[tableSchema.PrimaryKeys.First().ColumnName].ToString(); 126 | 127 | var recordIdSb = new StringBuilder(); 128 | var pkCtr = 0; 129 | foreach (var pkCol in tableSchema.PrimaryKeys.OrderBy(x => x.OrdinalPosition)) 130 | { 131 | if (pkCtr > 0) 132 | recordIdSb.Append("|"); 133 | 134 | recordIdSb.Append(reader[pkCol.ColumnName].ToString()); 135 | pkCtr++; 136 | } 137 | 138 | return recordIdSb.ToString(); 139 | } 140 | 141 | private async Task GetOpenConnectionAsync() 142 | { 143 | var conn = new SqlConnection(_connString); 144 | await conn.OpenAsync(); 145 | 146 | return conn; 147 | } 148 | 149 | private SqlParameter CreateSqlParameter(ColumnSchema column, string parameterName, object value) 150 | { 151 | SqlParameter parameter = null; 152 | switch (column.DataType) 153 | { 154 | case "char": 155 | parameter = new SqlParameter(parameterName, SqlDbType.Char, column.MaxCharsLength); 156 | break; 157 | case "varchar": 158 | parameter = new SqlParameter(parameterName, SqlDbType.VarChar, column.MaxCharsLength); 159 | break; 160 | case "nvarchar": 161 | parameter = new SqlParameter(parameterName, SqlDbType.NVarChar, column.MaxCharsLength); 162 | break; 163 | case "tinyint": 164 | parameter = new SqlParameter(parameterName, SqlDbType.TinyInt); 165 | break; 166 | case "smallint": 167 | parameter = new SqlParameter(parameterName, SqlDbType.SmallInt); 168 | break; 169 | case "int": 170 | parameter = new SqlParameter(parameterName, SqlDbType.Int); 171 | break; 172 | case "bigint": 173 | parameter = new SqlParameter(parameterName, SqlDbType.BigInt); 174 | break; 175 | case "date": 176 | parameter = new SqlParameter(parameterName, SqlDbType.Date); 177 | break; 178 | case "datetime": 179 | parameter = new SqlParameter(parameterName, SqlDbType.DateTime); 180 | break; 181 | case "datetime2": 182 | parameter = new SqlParameter(parameterName, SqlDbType.DateTime2); 183 | break; 184 | case "time": 185 | parameter = new SqlParameter(parameterName, SqlDbType.Time); 186 | break; 187 | case "bit": 188 | parameter = new SqlParameter(parameterName, SqlDbType.Bit); 189 | break; 190 | case "money": 191 | parameter = new SqlParameter(parameterName, SqlDbType.Money); 192 | break; 193 | case "uniqueidentifier": 194 | parameter = new SqlParameter(parameterName, SqlDbType.UniqueIdentifier); 195 | break; 196 | case "varbinary": 197 | parameter = new SqlParameter(parameterName, SqlDbType.VarBinary); 198 | break; 199 | default: 200 | throw new Exception("SQL data type not supported: " + column.DataType); 201 | } 202 | 203 | parameter.Value = value; 204 | 205 | return parameter; 206 | } 207 | } 208 | } 209 | --------------------------------------------------------------------------------