├── logo.png ├── docs ├── favicon.ico ├── Gemfile ├── assets │ └── images │ │ └── logo.png ├── features │ ├── index.md │ ├── bulkwriter.md │ └── etlpipeline.md ├── gettingstarted.md ├── examples │ ├── index.md │ ├── advanced-pipelining.md │ ├── relative-distances.md │ └── pipelining.md ├── 404.html ├── index.md ├── motivation.md ├── _config.yml ├── bulkinserting.md ├── _sass │ └── custom │ │ └── custom.scss └── pipelining.md ├── src ├── BulkWriter.Tests │ ├── Properties │ │ └── AssemblyInfo.cs │ ├── BulkWriter.Tests.csproj │ ├── BulkWriterAsyncTests.cs │ ├── BulkWriterAsyncEnumerableTests.cs │ ├── DbContainerFixture.cs │ ├── TypeExtensionsTests.cs │ ├── PropertyInfoExtensionsTests.cs │ ├── BulkWriterInitializationTests.cs │ ├── BulkWriterTests.cs │ ├── EnumerableDataReaderTests.cs │ └── AsyncEnumerableDataReaderTests.cs ├── BulkWriter │ ├── Internal │ │ ├── GetPropertyValueHandler.cs │ │ ├── MappingSource.cs │ │ ├── PropertyMapping.cs │ │ ├── MappingDestination.cs │ │ ├── PropertyMappingExtensions.cs │ │ ├── PropertyInfoExtensions.cs │ │ ├── AsyncEnumerableDataReader.cs │ │ ├── TypeExtensions.cs │ │ └── EnumerableDataReader.cs │ ├── Pipeline │ │ ├── Internal │ │ │ ├── IEtlPipelineStep.cs │ │ │ ├── BulkWriterEtlPipelineStep.cs │ │ │ ├── EtlPipelineContext.cs │ │ │ ├── StartEtlPipelineStep.cs │ │ │ ├── AsyncStartEtlPipelineStep.cs │ │ │ ├── AggregateEtlPipelineStep.cs │ │ │ ├── ProjectEtlPipelineStep.cs │ │ │ ├── TransformEtlPipelineStep.cs │ │ │ ├── PivotEtlPipelineStep.cs │ │ │ └── EtlPipelineStep.cs │ │ ├── Transforms │ │ │ ├── ITransformer.cs │ │ │ ├── IProjector.cs │ │ │ ├── IPivot.cs │ │ │ └── IAggregator.cs │ │ ├── IEtlPipeline.cs │ │ ├── EtlPipeline.cs │ │ └── Steps │ │ │ └── IEtlPipelineStep.cs │ ├── Properties │ │ ├── AssemblyInfo.cs │ │ ├── Resources.resx │ │ └── Resources.Designer.cs │ ├── IBulkWriter.cs │ ├── BulkWriter.csproj │ └── BulkWriter.cs ├── BulkWriter.Benchmark │ ├── Program.cs │ ├── DomainEntity.cs │ ├── BulkWriter.Benchmark.csproj │ ├── Benchmarks │ │ ├── BenchmarkBaseClass.cs │ │ └── BulkWriterBenchmark.cs │ ├── DataGenerationHelpers.cs │ └── DbHelpers.cs └── BulkWriter.Demo │ ├── BulkWriter.Demo.csproj │ ├── MyDomainEntity.cs │ └── Program.cs ├── .config └── dotnet-tools.json ├── setup.ps1 ├── Directory.Build.targets ├── Push.ps1 ├── .github └── workflows │ ├── ci.yml │ ├── release.yml │ ├── devskim.yml │ └── triage-issues.yml ├── LICENSE ├── Build.ps1 ├── README.md ├── .gitignore ├── CONTRIBUTING.md ├── BulkWriter.sln └── .editorconfig /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbogard/bulk-writer/HEAD/logo.png -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbogard/bulk-writer/HEAD/docs/favicon.ico -------------------------------------------------------------------------------- /docs/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gem 'github-pages', group: :jekyll_plugins 3 | -------------------------------------------------------------------------------- /docs/assets/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbogard/bulk-writer/HEAD/docs/assets/images/logo.png -------------------------------------------------------------------------------- /src/BulkWriter.Tests/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | [assembly: Xunit.CollectionBehavior(DisableTestParallelization = true)] -------------------------------------------------------------------------------- /src/BulkWriter/Internal/GetPropertyValueHandler.cs: -------------------------------------------------------------------------------- 1 | namespace BulkWriter.Internal 2 | { 3 | internal delegate object GetPropertyValueHandler(object instance); 4 | } -------------------------------------------------------------------------------- /docs/features/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Features 4 | has_children: true 5 | nav_order: 2 6 | --- 7 | # Features 8 | 9 | *Bulk Writer* contains a couple of features to aid in creating your ETL processes. 10 | -------------------------------------------------------------------------------- /.config/dotnet-tools.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "isRoot": true, 4 | "tools": { 5 | "gitversion.tool": { 6 | "version": "5.1.2", 7 | "commands": [ 8 | "dotnet-gitversion" 9 | ] 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /docs/gettingstarted.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Getting Started 4 | parent: Overview 5 | nav_order: 1 6 | --- 7 | # Getting Started 8 | 9 | Install the *Bulk Writer* library via NuGet with the following command: 10 | 11 | ```Install-Package BulkWriter``` 12 | -------------------------------------------------------------------------------- /src/BulkWriter/Internal/MappingSource.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | 3 | namespace BulkWriter.Internal 4 | { 5 | internal class MappingSource 6 | { 7 | public PropertyInfo Property { get; set; } 8 | 9 | public int Ordinal { get; set; } 10 | } 11 | } -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Internal/IEtlPipelineStep.cs: -------------------------------------------------------------------------------- 1 | using System.Threading; 2 | using System.Threading.Tasks; 3 | 4 | namespace BulkWriter.Pipeline.Internal 5 | { 6 | internal interface IEtlPipelineStep 7 | { 8 | Task Run(CancellationToken cancellationToken); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/BulkWriter/Internal/PropertyMapping.cs: -------------------------------------------------------------------------------- 1 | namespace BulkWriter.Internal 2 | { 3 | internal class PropertyMapping 4 | { 5 | public bool ShouldMap { get; set; } 6 | 7 | public MappingSource Source { get; set; } 8 | 9 | public MappingDestination Destination { get; set; } 10 | } 11 | } -------------------------------------------------------------------------------- /src/BulkWriter.Benchmark/Program.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Running; 2 | 3 | namespace BulkWriter.Benchmark 4 | { 5 | class Program 6 | { 7 | static void Main(string[] args) 8 | { 9 | DbHelpers.SetupDb(); 10 | BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args); 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /setup.ps1: -------------------------------------------------------------------------------- 1 | # set PSGallery as trusted so we can install packages from there 2 | Write-Host 'Trusting PS Gallery' 3 | Set-PSRepository -Name "PSGallery" -InstallationPolicy Trusted 4 | 5 | # Install PSAKE 6 | Write-Host 'Installing PSake' 7 | Install-Module -Name psake -Scope CurrentUser 8 | 9 | # Install dotnet based tools (requires a manifest) 10 | Write-Host 'Install dotnet tools' 11 | dotnet tool restore -------------------------------------------------------------------------------- /Directory.Build.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | $([System.IO.Path]::Combine('$(IntermediateOutputPath)','$(TargetFrameworkMoniker).AssemblyAttributes$(DefaultLanguageSourceExtension)')) 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/BulkWriter/Internal/MappingDestination.cs: -------------------------------------------------------------------------------- 1 | namespace BulkWriter.Internal 2 | { 3 | internal class MappingDestination 4 | { 5 | public string ColumnName { get; set; } 6 | 7 | public int ColumnOrdinal { get; set; } 8 | 9 | public int ColumnSize { get; set; } 10 | 11 | public string DataTypeName { get; set; } 12 | 13 | public bool IsKey { get; set; } 14 | } 15 | } -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Transforms/ITransformer.cs: -------------------------------------------------------------------------------- 1 | namespace BulkWriter.Pipeline.Transforms 2 | { 3 | public interface ITransformer 4 | { 5 | /// 6 | /// Transforms an input object in-place (i.e. via side-effects) 7 | /// 8 | /// Input object modified by the transform 9 | void Transform(TOut input); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/BulkWriter.Demo/BulkWriter.Demo.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net10.0 4 | latest 5 | 6 | Exe 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /src/BulkWriter.Demo/MyDomainEntity.cs: -------------------------------------------------------------------------------- 1 | using System.ComponentModel.DataAnnotations; 2 | using System.ComponentModel.DataAnnotations.Schema; 3 | 4 | namespace BulkWriter.Demo 5 | { 6 | [Table("MyDomainEntities")] 7 | public class MyDomainEntity 8 | { 9 | [Key] 10 | public int Id { get; set; } 11 | 12 | public string FirstName { get; set; } 13 | 14 | public string LastName { get; set; } 15 | } 16 | } -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Transforms/IProjector.cs: -------------------------------------------------------------------------------- 1 | namespace BulkWriter.Pipeline.Transforms 2 | { 3 | public interface IProjector 4 | { 5 | /// 6 | /// Projects the input object to a new object type 7 | /// 8 | /// Input object to be projected 9 | /// Projected object 10 | TOut ProjectTo(TIn input); 11 | } 12 | } -------------------------------------------------------------------------------- /src/BulkWriter.Benchmark/DomainEntity.cs: -------------------------------------------------------------------------------- 1 | using System.ComponentModel.DataAnnotations; 2 | using System.ComponentModel.DataAnnotations.Schema; 3 | 4 | namespace BulkWriter.Benchmark 5 | { 6 | [Table("DomainEntities")] 7 | public class DomainEntity 8 | { 9 | [Key] 10 | public long Id { get; set; } 11 | 12 | public string FirstName { get; set; } 13 | 14 | public string LastName { get; set; } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /docs/examples/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Examples 4 | nav_order: 3 5 | has_children: true 6 | --- 7 | # Examples 8 | 9 | *Bulk Writer* is built on the concept of pull-based data streaming, and the code to implement this concept isn't really all that complicated. However, even simple implementations can enable very complex scenarios. Here are some code samples to demonstrate implementing ETL pipelines of various complexity using the *Bulk Writer* library. 10 | -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 | 5 | 18 | 19 |
20 |

404

21 | 22 |

Page not found :(

23 |

The requested page could not be found.

24 |
25 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Transforms/IPivot.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace BulkWriter.Pipeline.Transforms 4 | { 5 | public interface IPivot 6 | { 7 | /// 8 | /// Pivots a single input object out to multiple output objects 9 | /// 10 | /// Input object to pivot 11 | /// Enumerable with zero or more output objects 12 | IEnumerable Pivot(TIn input); 13 | } 14 | } -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Transforms/IAggregator.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace BulkWriter.Pipeline.Transforms 4 | { 5 | public interface IAggregator 6 | { 7 | /// 8 | /// Aggregates multiple input objects down to a single output 9 | /// 10 | /// Set of input objects to aggregate 11 | /// Aggregated result object 12 | TOut Aggregate(IEnumerable input); 13 | } 14 | } -------------------------------------------------------------------------------- /src/BulkWriter.Benchmark/BulkWriter.Benchmark.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net10.0 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/BulkWriter/Internal/PropertyMappingExtensions.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Data.SqlClient; 2 | 3 | namespace BulkWriter.Internal 4 | { 5 | internal static class PropertyMappingExtensions 6 | { 7 | public static SqlBulkCopyColumnMapping ToColumnMapping(this PropertyMapping self) => 8 | !string.IsNullOrWhiteSpace(self.Destination.ColumnName) 9 | ? new SqlBulkCopyColumnMapping(self.Source.Ordinal, self.Destination.ColumnName) 10 | : new SqlBulkCopyColumnMapping(self.Source.Ordinal, self.Destination.ColumnOrdinal); 11 | } 12 | } -------------------------------------------------------------------------------- /Push.ps1: -------------------------------------------------------------------------------- 1 | $scriptName = $MyInvocation.MyCommand.Name 2 | $artifacts = "./artifacts" 3 | 4 | if ([string]::IsNullOrEmpty($Env:NUGET_API_KEY)) { 5 | Write-Host "${scriptName}: NUGET_API_KEY is empty or not set. Skipped pushing package(s)." 6 | } else { 7 | Get-ChildItem $artifacts -Filter "*.nupkg" | ForEach-Object { 8 | Write-Host "$($scriptName): Pushing $($_.Name)" 9 | dotnet nuget push $_ --source $Env:NUGET_URL --api-key $Env:NUGET_API_KEY 10 | if ($lastexitcode -ne 0) { 11 | throw ("Exec: " + $errorMessage) 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/BulkWriter.Benchmark/Benchmarks/BenchmarkBaseClass.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using BenchmarkDotNet.Attributes; 3 | 4 | namespace BulkWriter.Benchmark.Benchmarks 5 | { 6 | public class BenchmarkBaseClass 7 | { 8 | [GlobalSetup] 9 | public virtual void GlobalSetup() 10 | { 11 | using var sqlConnection = DbHelpers.OpenSqlConnection(); 12 | DbHelpers.TruncateTable(sqlConnection); 13 | } 14 | 15 | protected IEnumerable GetTestRecords() 16 | { 17 | return DataGenerationHelpers.GetDomainEntities(1000); 18 | } 19 | } 20 | } -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Overview 4 | has_children: true 5 | nav_order: 1 6 | --- 7 | # Bulk Writer 8 | 9 | A small library which facilitates building fast, memory-efficient, pull-based ETL processes in C#. 10 | 11 | *Bulk Writer* provides a wrapper over `Microsoft.Data.SqlClient.SqlBulkCopy` to enable streaming of records to a target data store using an `IEnumerable` or `IAsyncEnumerable` (.NET Standard 2.1 or later) as the data source. This approach keeps memory overhead low when loading a large volumes of data, while taking advantage of high-speed bulk inserts to the database provided by `SqlBulkCopy`. Helper classes provide means to apply this model manually, and to build custom ETL pipelines to transform data on its way into the data store. 12 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/IEtlPipeline.cs: -------------------------------------------------------------------------------- 1 | using System.Threading; 2 | using System.Threading.Tasks; 3 | 4 | namespace BulkWriter.Pipeline 5 | { 6 | public interface IEtlPipeline 7 | { 8 | /// 9 | /// Executes the previously configured pipeline 10 | /// 11 | /// Awaitable Task for the running pipeline 12 | Task ExecuteAsync(); 13 | 14 | /// 15 | /// Executes the previously configured pipeline in a cancellable fashion 16 | /// 17 | /// Token for cancelling the pipeline mid-run 18 | /// Awaitable Task for the running pipeline 19 | Task ExecuteAsync(CancellationToken cancellationToken); 20 | } 21 | } -------------------------------------------------------------------------------- /src/BulkWriter/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Reflection; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTrademark("")] 9 | [assembly: AssemblyCulture("")] 10 | 11 | // Setting ComVisible to false makes the types in this assembly not visible 12 | // to COM components. If you need to access a type in this assembly from 13 | // COM, set the ComVisible attribute to true on that type. 14 | [assembly: ComVisible(false)] 15 | 16 | // The following GUID is for the ID of the typelib if this project is exposed to COM 17 | [assembly: Guid("d4478ca6-9e19-4372-8d21-5f90f05212c0")] 18 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v2 16 | with: 17 | fetch-depth: 0 18 | - name: Setup dotnet 19 | uses: actions/setup-dotnet@v4 20 | with: 21 | dotnet-version: | 22 | 8.0.x 23 | 9.0.x 24 | 10.0.x 25 | - name: Setup required dependencies 26 | run: ./setup.ps1 27 | shell: pwsh 28 | - name: Build and Test 29 | run: ./Build.ps1 30 | shell: pwsh 31 | - name: Artifacts 32 | uses: actions/upload-artifact@v4 33 | with: 34 | name: artifacts 35 | path: artifacts/**/* -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Internal/BulkWriterEtlPipelineStep.cs: -------------------------------------------------------------------------------- 1 | using System.Threading; 2 | using System.Threading.Tasks; 3 | 4 | namespace BulkWriter.Pipeline.Internal 5 | { 6 | internal class BulkWriterEtlPipelineStep : EtlPipelineStep 7 | { 8 | private readonly IBulkWriter _bulkWriter; 9 | 10 | public BulkWriterEtlPipelineStep(EtlPipelineStepBase previousStep, IBulkWriter bulkWriter) : base(previousStep) 11 | { 12 | _bulkWriter = bulkWriter; 13 | } 14 | 15 | protected override async Task RunCore(CancellationToken cancellationToken) 16 | { 17 | var enumerable = InputCollection.GetConsumingEnumerable(cancellationToken); 18 | await _bulkWriter.WriteToDatabaseAsync(enumerable); 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/BulkWriter.Benchmark/DataGenerationHelpers.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Threading; 3 | 4 | namespace BulkWriter.Benchmark 5 | { 6 | internal static class DataGenerationHelpers 7 | { 8 | private static long _idCounter = 0; 9 | 10 | public static IEnumerable GetDomainEntities(int count) 11 | { 12 | for (var i = 0; i < count; i++) 13 | { 14 | yield return new DomainEntity 15 | { 16 | Id = GetNextId(), 17 | FirstName = $"Bob-{i}", 18 | LastName = $"Smith-{i}" 19 | }; 20 | } 21 | } 22 | 23 | private static long GetNextId() 24 | { 25 | return Interlocked.Increment(ref _idCounter); 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*.*.*' 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout 12 | uses: actions/checkout@v2 13 | with: 14 | fetch-depth: 0 15 | - name: Setup dotnet 16 | uses: actions/setup-dotnet@v4 17 | with: 18 | dotnet-version: | 19 | 8.0.x 20 | 9.0.x 21 | 10.0.x 22 | - name: Build and Test 23 | run: ./Build.ps1 24 | shell: pwsh 25 | - name: Push to NuGet 26 | env: 27 | NUGET_URL: https://api.nuget.org/v3/index.json 28 | NUGET_API_KEY: ${{ secrets.NUGET_API_KEY }} 29 | run: ./Push.ps1 30 | shell: pwsh 31 | - name: Artifacts 32 | uses: actions/upload-artifact@v4 33 | with: 34 | name: artifacts 35 | path: artifacts/**/* -------------------------------------------------------------------------------- /src/BulkWriter.Tests/BulkWriter.Tests.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net10.0 4 | BulkWriter.Tests 5 | false 6 | 7 | 8 | 9 | 10 | 11 | 12 | all 13 | runtime; build; native; contentfiles; analyzers; buildtransitive 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Internal/EtlPipelineContext.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Microsoft.Extensions.Logging; 3 | 4 | namespace BulkWriter.Pipeline.Internal 5 | { 6 | internal class EtlPipelineContext 7 | { 8 | private readonly Action _addStepToPipelineAction; 9 | 10 | public EtlPipelineContext(IEtlPipeline etlPipeline, Action addStepToPipelineAction) 11 | { 12 | Pipeline = etlPipeline; 13 | _addStepToPipelineAction = addStepToPipelineAction; 14 | } 15 | 16 | public IEtlPipeline Pipeline { get; } 17 | public ILoggerFactory LoggerFactory { get; set; } 18 | public int TotalSteps { get; private set; } 19 | 20 | public void AddStep(IEtlPipelineStep step) 21 | { 22 | ++TotalSteps; 23 | _addStepToPipelineAction(step); 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Internal/StartEtlPipelineStep.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Threading; 3 | using System.Threading.Tasks; 4 | 5 | namespace BulkWriter.Pipeline.Internal 6 | { 7 | internal class StartEtlPipelineStep : EtlPipelineStep 8 | { 9 | private readonly IEnumerable _inputEnumerable; 10 | 11 | public StartEtlPipelineStep(EtlPipelineContext pipelineContext, IEnumerable inputEnumerable) : base(pipelineContext) 12 | { 13 | _inputEnumerable = inputEnumerable; 14 | } 15 | 16 | protected override Task RunCore(CancellationToken cancellationToken) 17 | { 18 | foreach (var item in _inputEnumerable) 19 | { 20 | OutputCollection.Add(item, cancellationToken); 21 | } 22 | 23 | return Task.CompletedTask; 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /.github/workflows/devskim.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | name: DevSkim 7 | 8 | on: 9 | push: 10 | branches: [ "main" ] 11 | pull_request: 12 | branches: [ "main" ] 13 | schedule: 14 | - cron: '25 3 * * 0' 15 | 16 | jobs: 17 | lint: 18 | name: DevSkim 19 | runs-on: ubuntu-20.04 20 | permissions: 21 | actions: read 22 | contents: read 23 | security-events: write 24 | steps: 25 | - name: Checkout code 26 | uses: actions/checkout@v3 27 | 28 | - name: Run DevSkim scanner 29 | uses: microsoft/DevSkim-Action@v1 30 | 31 | - name: Upload DevSkim scan results to GitHub Security tab 32 | uses: github/codeql-action/upload-sarif@v2 33 | with: 34 | sarif_file: devskim-results.sarif 35 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Internal/AsyncStartEtlPipelineStep.cs: -------------------------------------------------------------------------------- 1 | #if NETSTANDARD2_1_OR_GREATER || NET6_0_OR_GREATER 2 | using System.Collections.Generic; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | 6 | namespace BulkWriter.Pipeline.Internal 7 | { 8 | internal class AsyncStartEtlPipelineStep : EtlPipelineStep 9 | { 10 | private readonly IAsyncEnumerable _inputEnumerable; 11 | 12 | public AsyncStartEtlPipelineStep(EtlPipelineContext pipelineContext, IAsyncEnumerable inputEnumerable) : base(pipelineContext) 13 | { 14 | _inputEnumerable = inputEnumerable; 15 | } 16 | 17 | protected override async Task RunCore(CancellationToken cancellationToken) 18 | { 19 | await foreach (var item in _inputEnumerable.WithCancellation(cancellationToken)) 20 | { 21 | OutputCollection.Add(item, cancellationToken); 22 | } 23 | } 24 | } 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Internal/AggregateEtlPipelineStep.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | 6 | namespace BulkWriter.Pipeline.Internal 7 | { 8 | internal class AggregateEtlPipelineStep : EtlPipelineStep 9 | { 10 | private readonly Func, TOut> _aggregationFunc; 11 | 12 | public AggregateEtlPipelineStep(EtlPipelineStepBase previousStep, Func, TOut> aggregationFunc) : base(previousStep) 13 | { 14 | _aggregationFunc = aggregationFunc ?? throw new ArgumentNullException(nameof(aggregationFunc)); 15 | } 16 | 17 | protected override Task RunCore(CancellationToken cancellationToken) 18 | { 19 | var enumerable = InputCollection.GetConsumingEnumerable(cancellationToken); 20 | 21 | var result = _aggregationFunc(enumerable); 22 | OutputCollection.Add(result, cancellationToken); 23 | 24 | return Task.CompletedTask; 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Internal/ProjectEtlPipelineStep.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Threading; 3 | using System.Threading.Tasks; 4 | 5 | namespace BulkWriter.Pipeline.Internal 6 | { 7 | internal class ProjectEtlPipelineStep : EtlPipelineStep 8 | { 9 | private readonly Func _projectionFunc; 10 | 11 | public ProjectEtlPipelineStep(EtlPipelineStepBase previousStep, Func projectionFunc) : base(previousStep) 12 | { 13 | _projectionFunc = projectionFunc ?? throw new ArgumentNullException(nameof(projectionFunc)); 14 | } 15 | 16 | protected override Task RunCore(CancellationToken cancellationToken) 17 | { 18 | var enumerable = InputCollection.GetConsumingEnumerable(cancellationToken); 19 | 20 | foreach (var item in enumerable) 21 | { 22 | var result = _projectionFunc(item); 23 | OutputCollection.Add(result, cancellationToken); 24 | } 25 | 26 | return Task.CompletedTask; 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Headspring 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Internal/TransformEtlPipelineStep.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Threading; 3 | using System.Threading.Tasks; 4 | 5 | namespace BulkWriter.Pipeline.Internal 6 | { 7 | internal class TransformEtlPipelineStep : EtlPipelineStep 8 | { 9 | private readonly Action[] _transformActions; 10 | 11 | public TransformEtlPipelineStep(EtlPipelineStepBase previousStep, params Action[] transformActions) : base(previousStep) 12 | { 13 | _transformActions = transformActions; 14 | } 15 | 16 | protected override Task RunCore(CancellationToken cancellationToken) 17 | { 18 | var enumerable = InputCollection.GetConsumingEnumerable(cancellationToken); 19 | 20 | foreach (var item in enumerable) 21 | { 22 | foreach (var transformAction in _transformActions) 23 | { 24 | transformAction(item); 25 | } 26 | 27 | OutputCollection.Add(item, cancellationToken); 28 | } 29 | 30 | return Task.CompletedTask; 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Internal/PivotEtlPipelineStep.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | 6 | namespace BulkWriter.Pipeline.Internal 7 | { 8 | internal class PivotEtlPipelineStep : EtlPipelineStep 9 | { 10 | private readonly Func> _pivotFunc; 11 | 12 | public PivotEtlPipelineStep(EtlPipelineStepBase previousStep, Func> pivotFunc) : base(previousStep) 13 | { 14 | _pivotFunc = pivotFunc ?? throw new ArgumentNullException(nameof(pivotFunc)); 15 | } 16 | 17 | protected override Task RunCore(CancellationToken cancellationToken) 18 | { 19 | var enumerable = InputCollection.GetConsumingEnumerable(cancellationToken); 20 | 21 | foreach (var item in enumerable) 22 | { 23 | var outputs = _pivotFunc(item); 24 | foreach (var output in outputs) 25 | { 26 | OutputCollection.Add(output, cancellationToken); 27 | } 28 | } 29 | 30 | return Task.CompletedTask; 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /Build.ps1: -------------------------------------------------------------------------------- 1 | # Taken from psake https://github.com/psake/psake 2 | 3 | <# 4 | .SYNOPSIS 5 | This is a helper function that runs a scriptblock and checks the PS variable $lastexitcode 6 | to see if an error occcured. If an error is detected then an exception is thrown. 7 | This function allows you to run command-line programs without having to 8 | explicitly check the $lastexitcode variable. 9 | .EXAMPLE 10 | exec { svn info $repository_trunk } "Error executing SVN. Please verify SVN command-line client is installed" 11 | #> 12 | function Exec 13 | { 14 | [CmdletBinding()] 15 | param( 16 | [Parameter(Position=0,Mandatory=1)][scriptblock]$cmd, 17 | [Parameter(Position=1,Mandatory=0)][string]$errorMessage = ($msgs.error_bad_command -f $cmd) 18 | ) 19 | & $cmd 20 | if ($lastexitcode -ne 0) { 21 | throw ("Exec: " + $errorMessage) 22 | } 23 | } 24 | 25 | $artifacts = ".\artifacts" 26 | 27 | if(Test-Path $artifacts) { Remove-Item $artifacts -Force -Recurse } 28 | 29 | exec { & dotnet clean -c Release } 30 | 31 | exec { & dotnet build -c Release } 32 | 33 | exec { & dotnet test -c Release --no-build -l trx --verbosity=normal } 34 | 35 | exec { & dotnet pack .\src\BulkWriter\BulkWriter.csproj -c Release -o $artifacts --no-build } 36 | 37 | -------------------------------------------------------------------------------- /src/BulkWriter/IBulkWriter.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | 6 | namespace BulkWriter 7 | { 8 | public interface IBulkWriter : IDisposable 9 | { 10 | /// 11 | /// Bulk loads an input enumerable of type 12 | /// 13 | /// Items to load to the database 14 | void WriteToDatabase(IEnumerable items); 15 | 16 | /// 17 | /// Bulk loads an input enumerable of type 18 | /// 19 | /// Items to load to the database 20 | /// Optional cancellation token 21 | /// Awaitable task for writing to the database 22 | Task WriteToDatabaseAsync(IEnumerable items, CancellationToken cancellationToken = default); 23 | 24 | #if NETSTANDARD2_1_OR_GREATER || NET6_0_OR_GREATER 25 | 26 | /// 27 | /// Bulk loads an input async enumerable of type 28 | /// 29 | /// Items to async load to the database 30 | /// Optional cancellation token 31 | /// Awaitable task for writing to the database 32 | Task WriteToDatabaseAsync(IAsyncEnumerable items, CancellationToken cancellationToken = default); 33 | #endif 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /docs/motivation.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Motivation 4 | parent: Overview 5 | nav_order: 2 6 | --- 7 | # Motivation 8 | 9 | We've all had reasons to write ETL jobs in C# rather than with Integration Services in SQL Server. Sometimes it's because your ETL's transform logic is easier to reason about in C#, sometimes we want to utilize .NET's rich library ecosystem, but for whatever reason, it's a perfectly acceptable way to to do things. 10 | 11 | ## Challenge 12 | 13 | When writing an ETL process in C#, we use the tools available to us like NHibernate, Entity Framework or Dapper to read from and write to databases. These tools help us stream _from_ source data pretty easily, but unfortunately, they don't make it easy to stream data _to_ our target data stores. Instead, they typically leave us writing to target data stores with `INSERT` statements, which is not performant for transforms that generate very large data sets. 14 | 15 | > **What we need for transforms that generate very large data sets is a technique to stream data into a target data store, just as we're able to stream from source data.** 16 | 17 | Such a technique would allow writing to target data stores as fast as our transforms and hardware will allow, compared to relying on our ORM to generate `INSERT` statements. In most cases, it would also use significantly less memory. 18 | 19 | ## Solution 20 | 21 | This library and the guidance that follows show how to use `SqlBulkCopy`, `IEnumerable` and `IDataReader` to enable this kind of streaming technique, that is, to stream from a data source and to stream into a data store, with our C# ETLs. We'll also cover how to change your "push"-based transforms that use `INSERT` statements to "pull"-based transforms that use `IEnumerable`. 22 | -------------------------------------------------------------------------------- /.github/workflows/triage-issues.yml: -------------------------------------------------------------------------------- 1 | # https://github.com/actions/stale 2 | 3 | name: "Stale issue & PR handler" 4 | 5 | on: 6 | workflow_dispatch: 7 | schedule: 8 | - cron: "0 12 * * *" 9 | 10 | env: 11 | ISSUES_DAYS_BEFORE_CLOSE: 14 12 | PR_DAYS_BEFORE_CLOSE: 14 13 | ISSUES_DAYS_BEFORE_STALE: 60 14 | PR_DAYS_BEFORE_STALE: 28 15 | 16 | jobs: 17 | issues: 18 | name: "Close stale issues and PRs" 19 | runs-on: "ubuntu-latest" 20 | steps: 21 | - uses: "actions/stale@v6.0.0" 22 | with: 23 | stale-issue-label: "stale" 24 | stale-issue-message: "This issue is stale because it has been open ${{ env.ISSUES_DAYS_BEFORE_STALE }} days with no activity. Remove stale label or comment or this will be closed in ${{ env.ISSUES_DAYS_BEFORE_CLOSE }} days." 25 | close-issue-message: 'This issue was closed because it has been stalled for ${{ env.ISSUES_DAYS_BEFORE_CLOSE }} days with no activity.' 26 | days-before-close: "${{ env.ISSUES_DAYS_BEFORE_CLOSE }}" 27 | days-before-stale: "${{ env.ISSUES_DAYS_BEFORE_STALE }}" 28 | exempt-issue-assignees: true 29 | exempt-issue-labels: 'awaiting-approval,work-in-progress,up-for-grabs' 30 | stale-pr-label: "stale" 31 | stale-pr-message: 'This PR is stale because it has been open ${{ env.PR_DAYS_BEFORE_STALE }} days with no activity. Remove stale label or comment or this will be closed in ${{ env.PR_DAYS_BEFORE_CLOSE }} days.' 32 | close-pr-message: 'This PR was closed because it has been stalled for ${{ env.PR_DAYS_BEFORE_CLOSE }} days with no activity.' 33 | days-before-pr-close: "${{ env.PR_DAYS_BEFORE_CLOSE }}" 34 | days-before-pr-stale: "${{ env.PR_DAYS_BEFORE_STALE }}" 35 | exempt-all-pr-assignees: true 36 | exempt-pr-labels: 'awaiting-approval,work-in-progress' -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | # Welcome to Jekyll! 2 | # 3 | # This config file is meant for settings that affect your whole blog, values 4 | # which you are expected to set up once and rarely edit after that. If you find 5 | # yourself editing this file very often, consider using Jekyll's data files 6 | # feature for the data you need to update frequently. 7 | # 8 | # For technical reasons, this file is *NOT* reloaded automatically when you use 9 | # 'bundle exec jekyll serve'. If you change this file, please restart the server process. 10 | 11 | # Site settings 12 | # These are used to personalize your new site. If you look in the HTML files, 13 | # you will see them accessed via {{ site.title }}, {{ site.email }}, and so on. 14 | # You can create any custom variable you would like, and they will be accessible 15 | # in the templates via {{ site.myvariable }}. 16 | title: "Bulk Writer" 17 | description: "Bulk Writer is a small library which facilitates building fast, memory-efficient, pull-based ETL processes in C#." 18 | baseurl: "/bulk-writer" # the subpath of your site, e.g. /blog 19 | url: "https://jbogard.github.io" # the base hostname & protocol for your site, e.g. http://example.com 20 | 21 | # Build settings 22 | markdown: kramdown 23 | remote_theme: pmarsceill/just-the-docs 24 | 25 | # Just the Docs Settings 26 | search_enabled: true 27 | footer_content: "Copyright © 2020 Headspring." 28 | logo: "assets/images/logo.png" 29 | 30 | defaults: 31 | - scope: 32 | path: "assets/images" 33 | values: 34 | image: true 35 | 36 | # Exclude from processing. 37 | # The following items will not be processed, by default. Create a custom list 38 | # to override the default setting. 39 | # exclude: 40 | # - Gemfile 41 | # - Gemfile.lock 42 | # - node_modules 43 | # - vendor/bundle/ 44 | # - vendor/cache/ 45 | # - vendor/gems/ 46 | # - vendor/ruby/ 47 | -------------------------------------------------------------------------------- /src/BulkWriter/Internal/PropertyInfoExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Reflection; 4 | using BulkWriter.Properties; 5 | using static System.Linq.Expressions.Expression; 6 | 7 | namespace BulkWriter.Internal 8 | { 9 | internal static class PropertyInfoExtensions 10 | { 11 | private static readonly Dictionary CachedGetters = new Dictionary(); 12 | 13 | public static GetPropertyValueHandler GetValueGetter(this PropertyInfo propertyInfo) 14 | { 15 | if (null == propertyInfo) 16 | { 17 | throw new ArgumentNullException(nameof(propertyInfo)); 18 | } 19 | 20 | if (null == propertyInfo.DeclaringType) 21 | { 22 | throw new ArgumentException(Resources.PropertyInfoExtensions_PropertyNotDeclaredOnType, nameof(propertyInfo)); 23 | } 24 | 25 | GetPropertyValueHandler getter; 26 | 27 | lock (CachedGetters) 28 | { 29 | if (!CachedGetters.TryGetValue(propertyInfo, out getter)) 30 | { 31 | var instance = Parameter(typeof(object), "instance"); 32 | var convertedInstance = Convert(instance, propertyInfo.DeclaringType); 33 | var propertyCall = Property(convertedInstance, propertyInfo); 34 | var convertedPropertyValue = Convert(propertyCall, typeof(object)); 35 | 36 | var lambda = Lambda(convertedPropertyValue, instance); 37 | var compiled = lambda.Compile(); 38 | 39 | CachedGetters[propertyInfo] = getter = compiled; 40 | } 41 | } 42 | 43 | return getter; 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /src/BulkWriter.Tests/BulkWriterAsyncTests.cs: -------------------------------------------------------------------------------- 1 | using System.Linq; 2 | using System.Threading.Tasks; 3 | using Xunit; 4 | 5 | namespace BulkWriter.Tests 6 | { 7 | [Collection(nameof(DbContainerFixture))] 8 | public class BulkWriterAsyncTests 9 | { 10 | private readonly string _connectionString; 11 | private readonly string _tableName = nameof(BulkWriterAsyncTestsMyTestClass); 12 | 13 | public class BulkWriterAsyncTestsMyTestClass 14 | { 15 | public int Id { get; set; } 16 | 17 | public string Name { get; set; } 18 | } 19 | 20 | private readonly DbContainerFixture _fixture; 21 | 22 | public BulkWriterAsyncTests(DbContainerFixture fixture) 23 | { 24 | _fixture = fixture; 25 | _connectionString = fixture.TestConnectionString; 26 | _fixture.ExecuteNonQuery(_connectionString, $"DROP TABLE IF EXISTS [dbo].[{_tableName}]"); 27 | 28 | _fixture.ExecuteNonQuery(_connectionString, 29 | "CREATE TABLE [dbo].[" + _tableName + "](" + 30 | "[Id] [int] IDENTITY(1,1) NOT NULL," + 31 | "[Name] [nvarchar](50) NULL," + 32 | "CONSTRAINT [PK_" + _tableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + 33 | ")"); 34 | } 35 | 36 | [Fact] 37 | public async Task CanWriteSync() 38 | { 39 | var writer = new BulkWriter(_connectionString); 40 | 41 | var items = Enumerable.Range(1, 1000).Select(i => new BulkWriterAsyncTestsMyTestClass { Id = i, Name = "Bob"}); 42 | 43 | await writer.WriteToDatabaseAsync(items); 44 | 45 | var count = (int) await _fixture.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {_tableName}"); 46 | 47 | Assert.Equal(1000, count); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/BulkWriter.Tests/BulkWriterAsyncEnumerableTests.cs: -------------------------------------------------------------------------------- 1 | using System.Linq; 2 | using System.Threading.Tasks; 3 | using Xunit; 4 | 5 | namespace BulkWriter.Tests 6 | { 7 | [Collection(nameof(DbContainerFixture))] 8 | public class BulkWriterAsyncEnumerableTests 9 | { 10 | private readonly string _connectionString; 11 | private readonly string _tableName = nameof(BulkWriterAsyncTestsMyTestClass); 12 | 13 | public class BulkWriterAsyncTestsMyTestClass 14 | { 15 | public int Id { get; set; } 16 | 17 | public string Name { get; set; } 18 | } 19 | 20 | private readonly DbContainerFixture _fixture; 21 | 22 | public BulkWriterAsyncEnumerableTests(DbContainerFixture fixture) 23 | { 24 | _fixture = fixture; 25 | _connectionString = fixture.TestConnectionString; 26 | 27 | _fixture.ExecuteNonQuery(_connectionString, $"DROP TABLE IF EXISTS [dbo].[{_tableName}]"); 28 | 29 | _fixture.ExecuteNonQuery(_connectionString, 30 | "CREATE TABLE [dbo].[" + _tableName + "](" + 31 | "[Id] [int] IDENTITY(1,1) NOT NULL," + 32 | "[Name] [nvarchar](50) NULL," + 33 | "CONSTRAINT [PK_" + _tableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + 34 | ")"); 35 | } 36 | 37 | [Fact] 38 | public async Task CanWriteSync() 39 | { 40 | var writer = new BulkWriter(_connectionString); 41 | 42 | var items = Enumerable 43 | .Range(1, 1000) 44 | .Select(i => new BulkWriterAsyncTestsMyTestClass { Id = i, Name = "Bob"}) 45 | .ToAsyncEnumerable(); 46 | 47 | await writer.WriteToDatabaseAsync(items); 48 | 49 | var count = (int) await _fixture.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {_tableName}"); 50 | 51 | Assert.Equal(1000, count); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/BulkWriter/BulkWriter.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | Headspring 4 | net8.0;net9.0;net10.0 5 | LINQ to SQL bulk copy 6 | LINQ to SQL bulk copy 7 | BulkWriter 8 | BulkWriter 9 | https://github.com/jbogard/bulk-writer 10 | LICENSE 11 | git 12 | https://github.com/jbogard/bulk-writer 13 | logo.png 14 | v 15 | true 16 | true 17 | snupkg 18 | true 19 | true 20 | 21 | 22 | 23 | $(TargetFrameworks);net462 24 | 25 | 26 | 27 | 28 | 29 | 30 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bulk Writer 2 | 3 | Bulk Writer is a small library which facilitates building fast, pull-based ETL processes in C# using `SqlBulkCopy`. 4 | 5 | ## Documentation 6 | 7 | Documentation can be found at https://jbogard.github.io/bulk-writer/ 8 | 9 | ## Installation 10 | 11 | [Bulk Writer](https://www.nuget.org/packages/BulkWriter/) is available on NuGet and can be installed using the package manager console: 12 | 13 | ``` 14 | PM> Install-Package BulkWriter 15 | ``` 16 | 17 | ## Usage 18 | 19 | ```csharp 20 | var q = 21 | from entity in GetAllEntities() 22 | where entity.IsActive && SomeOtherPredicate(entity) 23 | from zipCode in GetAllZipCodes() 24 | where zipCode.IsInContiguousStates && SomeOtherPredicate(zipCode) 25 | let distance = GetDistance(entity, zipCode) 26 | let arbitraryData = CreateSomeArbitraryData(entity, zipCode) 27 | where distance > 0 28 | select new EntityToZipCodeDistance { 29 | EntityId = entity.Id, 30 | ZipCode = zipCode.Zip, 31 | Distance = distance, 32 | ArbitraryData = arbitraryData 33 | }; 34 | 35 | using (var bulkWriter = new BulkWriter(connectionString)) 36 | { 37 | bulkWriter.WriteToDatabase(q); 38 | } 39 | // or async 40 | 41 | using (var bulkWriter = new BulkWriter(connectionString)) 42 | { 43 | await bulkWriter.WriteToDatabaseAsync(q); 44 | } 45 | 46 | // or async enumerables with .NET Standard 2.1 or later 47 | var u = q.ToAsyncEnumerable(); // 48 | 49 | using (var bulkWriter = new BulkWriter(connectionString)) 50 | { 51 | await bulkWriter.WriteToDatabaseAsync(u); 52 | } 53 | ``` 54 | 55 | ## Building Locally 56 | 57 | Run the following command once to setup your environment. 58 | ``` 59 | PS> .\setup.ps1 60 | ``` 61 | 62 | Run the command below to build and test the project. 63 | 64 | ``` 65 | PS> .\psake.cmd 66 | ``` 67 | 68 | ## Contributing 69 | 70 | Pull Requests are welcome. If you identify a bug or would like to make a feature request feel free to submit a GitHub Issue to start a discussion. 71 | -------------------------------------------------------------------------------- /docs/examples/advanced-pipelining.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Advanced Pipelining 4 | nav_order: 3 5 | parent: Examples 6 | --- 7 | # Advanced Pipelining 8 | 9 | To implement an advanced pipeline as described in the [Pipelining Overview](../pipelining.md#advanced-pipelining), you can make use of the `EtlPipeline` [class](../features/etlpipeline.md). The main difference between this example and the basic [Pipelining](./pipelinig.md) example is that the `EtlPipeline` class buffers output from each step in a producer/consumer collection. If any of your steps implement long-running operations, this buffering helps ensure downstream steps in the pipeline can stay busy (assuming they have work in their input buffers). 10 | 11 | ```csharp 12 | public class MyEntity 13 | { 14 | public int Id { get; set; } 15 | public string Name { get; set; } 16 | } 17 | 18 | public class MyOtherEntity 19 | { 20 | public int Id { get; set; } 21 | public string FirstName { get; set; } 22 | public string LastName { get; set; } 23 | } 24 | 25 | public class BobFromIdPivot : IPivot 26 | { 27 | public IEnumerable Pivot(int i) 28 | { 29 | for (var j = 1; j <= i; j++) 30 | { 31 | yield return new MyEntity { Id = j, Name = $"Bob {j}" }; 32 | } 33 | } 34 | } 35 | 36 | using (var writer = new BulkWriter()) 37 | { 38 | var items = Enumerable.Range(1, 1000000).Select(i => new MyEntity { Id = i, Name = "Carol" }); 39 | var pipeline = EtlPipeline 40 | .StartWith(items) 41 | .Aggregate(f => f.Max(c => c.Id)) 42 | .Pivot(new BobFromIdPivot()) 43 | .Project(i => 44 | { 45 | var nameParts = i.Name.Split(' '); 46 | return new MyOtherEntity {Id = i.Id, FirstName = nameParts[0], LastName = nameParts[1] }; 47 | }) 48 | .TransformInPlace(i => 49 | { 50 | i.Id -= 1; 51 | i.FirstName = "Alice"; 52 | i.LastName = $"{i.Id}"; 53 | }) 54 | .LogWith(loggerFactory) 55 | .WriteTo(writer); 56 | 57 | await pipeline.ExecuteAsync(); 58 | } 59 | ``` 60 | -------------------------------------------------------------------------------- /src/BulkWriter/Internal/AsyncEnumerableDataReader.cs: -------------------------------------------------------------------------------- 1 | #if NETSTANDARD2_1_OR_GREATER || NET6_0_OR_GREATER 2 | using System; 3 | using System.Collections; 4 | using System.Collections.Generic; 5 | using System.Data.Common; 6 | using System.Linq; 7 | using System.Runtime.CompilerServices; 8 | using BulkWriter.Properties; 9 | using System.Threading; 10 | using System.Threading.Tasks; 11 | 12 | [assembly: InternalsVisibleTo("BulkWriter.Tests")] 13 | namespace BulkWriter.Internal 14 | { 15 | internal class AsyncEnumerableDataReader : EnumerableDataReaderBase 16 | { 17 | private readonly IAsyncEnumerable _items; 18 | 19 | private bool _disposed; 20 | private IAsyncEnumerator _enumerator; 21 | 22 | public AsyncEnumerableDataReader(IAsyncEnumerable items, IEnumerable propertyMappings) 23 | : base(propertyMappings) 24 | { 25 | _items = items ?? throw new ArgumentNullException(nameof(items)); 26 | } 27 | 28 | public override TResult Current 29 | { 30 | get 31 | { 32 | EnsureNotDisposed(); 33 | return null != _enumerator ? _enumerator.Current : default(TResult); 34 | } 35 | } 36 | 37 | public override bool Read() => throw new NotImplementedException(); 38 | 39 | public override Task ReadAsync(CancellationToken cancellationToken) 40 | { 41 | _enumerator ??= _items.GetAsyncEnumerator(cancellationToken); 42 | 43 | return _enumerator.MoveNextAsync().AsTask(); 44 | } 45 | 46 | 47 | public override async ValueTask DisposeAsync() 48 | { 49 | if (_enumerator != null) 50 | { 51 | await _enumerator.DisposeAsync(); 52 | _enumerator = null; 53 | } 54 | _disposed = true; 55 | await base.DisposeAsync(); 56 | } 57 | 58 | protected override void EnsureNotDisposed() 59 | { 60 | if (_disposed) 61 | { 62 | throw new ObjectDisposedException("AsyncEnumerableDataReader"); 63 | } 64 | } 65 | } 66 | } 67 | #endif 68 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build Folders (you can keep bin if you'd like, to store dlls and pdbs) 2 | [Bb]in/ 3 | [Oo]bj/ 4 | 5 | # mstest test results 6 | TestResults 7 | 8 | ## Ignore Visual Studio temporary files, build results, and 9 | ## files generated by popular Visual Studio add-ons. 10 | 11 | # User-specific files 12 | *.suo 13 | *.user 14 | *.sln.docstates 15 | .idea/ 16 | .DS_Store 17 | 18 | # Build results 19 | [Dd]ebug/ 20 | [Rr]elease/ 21 | x64/ 22 | *_i.c 23 | *_p.c 24 | *.ilk 25 | *.meta 26 | *.obj 27 | *.pch 28 | *.pdb 29 | *.pgc 30 | *.pgd 31 | *.rsp 32 | *.sbr 33 | *.tlb 34 | *.tli 35 | *.tlh 36 | *.tmp 37 | *.log 38 | *.vspscc 39 | *.vssscc 40 | .builds 41 | 42 | # Visual C++ cache files 43 | ipch/ 44 | *.aps 45 | *.ncb 46 | *.opensdf 47 | *.sdf 48 | 49 | # Visual Studio profiler 50 | *.psess 51 | *.vsp 52 | *.vspx 53 | 54 | # Guidance Automation Toolkit 55 | *.gpState 56 | 57 | # ReSharper is a .NET coding add-in 58 | _ReSharper* 59 | 60 | # NCrunch 61 | *.ncrunch* 62 | .*crunch*.local.xml 63 | 64 | # Installshield output folder 65 | [Ee]xpress 66 | 67 | # DocProject is a documentation generator add-in 68 | DocProject/buildhelp/ 69 | DocProject/Help/*.HxT 70 | DocProject/Help/*.HxC 71 | DocProject/Help/*.hhc 72 | DocProject/Help/*.hhk 73 | DocProject/Help/*.hhp 74 | DocProject/Help/Html2 75 | DocProject/Help/html 76 | 77 | # Click-Once directory 78 | publish 79 | 80 | # Publish Web Output 81 | *.Publish.xml 82 | 83 | # NuGet Packages Directory 84 | packages 85 | 86 | # Windows Azure Build Output 87 | csx 88 | *.build.csdef 89 | 90 | # Windows Store app package directory 91 | AppPackages/ 92 | 93 | # Others 94 | [Bb]in 95 | [Oo]bj 96 | sql 97 | TestResults 98 | [Tt]est[Rr]esult* 99 | *.Cache 100 | ClientBin 101 | [Ss]tyle[Cc]op.* 102 | ~$* 103 | *.dbmdl 104 | Generated_Code #added for RIA/Silverlight projects 105 | 106 | # Backup & report files from converting an old project file to a newer 107 | # Visual Studio version. Backup files are not needed, because we have git ;-) 108 | _UpgradeReport_Files/ 109 | Backup*/ 110 | UpgradeLog*.XML 111 | 112 | *.nupkg 113 | *.snupkg 114 | .vs 115 | 116 | # GitHub Pages 117 | docs/_site 118 | docs/Gemfile.lock 119 | docs/.jekyll-metadata 120 | 121 | Directory.Build.props 122 | Nuget.Config -------------------------------------------------------------------------------- /docs/examples/relative-distances.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: LINQ to SqlBulkCopy 4 | parent: Examples 5 | nav_order: 1 6 | --- 7 | # LINQ to SqlBulkCopy 8 | 9 | Suppose that for performance reasons you wanted to cache the distances between some entities (such as a store, house or distribution center) and the centroid of every zip code in the U.S. Depending on the number of entities, you could easily produce a very large data set from these calculations. But sure, entity locations and zip code centroids aren't likely to change often enough to warrant computing this result set on every ETL job run, but the real point of this example is to show 10 | 11 | 1. Sometimes our transforms can produce exponentially larger data sets than our source data. In this example, the result set size is (# of Entities × # of Zipcodes). 12 | 2. That those very large data sets can be written to our target data store much faster than by generating an `INSERT` statement for each row. In this example, you'd have to generate (# of Entities × # of Zipcodes) `INSERT` statements which will never perform as well as bulk loading the data instead. 13 | 14 | We start off with this LINQ query that serves as our transform and will produce our large data set. 15 | 16 | ```csharp 17 | var q = 18 | from entity in GetAllEntities() 19 | where entity.IsActive && SomeOtherPredicate(entity) 20 | from zipCode in GetAllZipCodes() 21 | where zipCode.IsInContiguousStates && SomeOtherPredicate(zipCode) 22 | let distance = GetDistance(entity, zipCode) 23 | let arbitraryData = CreateSomeArbitraryData(entity, zipCode) 24 | where distance > 0 25 | select new EntityToZipCodeDistance 26 | { 27 | EntityId = entity.Id, 28 | ZipCode = zipCode.Zip, 29 | Distance = distance, 30 | ArbitraryData = arbitraryData 31 | }; 32 | ``` 33 | 34 | Note that this LINQ query does not execute until the `MoveNext()` method is called on its enumerator, which will ultimately be called by `SqlBulkCopy`. 35 | 36 | Next, all there is to do is let Bulk Writer write the results to your database table. 37 | 38 | ```csharp 39 | using (var bulkWriter = new BulkWriter(connectionString)) 40 | { 41 | bulkWriter.WriteToDatabase(q); 42 | } 43 | // or async 44 | 45 | using (var bulkWriter = new BulkWriter(connectionString)) 46 | { 47 | await bulkWriter.WriteToDatabaseAsync(q); 48 | } 49 | ``` 50 | -------------------------------------------------------------------------------- /src/BulkWriter/Internal/TypeExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.ComponentModel.DataAnnotations; 3 | using System.ComponentModel.DataAnnotations.Schema; 4 | using System.Linq; 5 | using System.Reflection; 6 | 7 | namespace BulkWriter.Internal 8 | { 9 | internal static class TypeExtensions 10 | { 11 | public static PropertyMapping[] BuildMappings(this Type type) => 12 | type.GetRuntimeProperties() 13 | .Select((pi, i) => 14 | { 15 | var destinationParam = new DestinationParams(pi, i); 16 | 17 | return new PropertyMapping 18 | { 19 | Source = new MappingSource 20 | { 21 | Property = pi, 22 | Ordinal = i 23 | }, 24 | ShouldMap = pi.GetCustomAttribute() == null, 25 | Destination = new MappingDestination 26 | { 27 | ColumnName = destinationParam.ColumnName, 28 | ColumnOrdinal = destinationParam.ColumnOrdinal, 29 | ColumnSize = destinationParam.ColumnSize, 30 | DataTypeName = destinationParam.DataTypeName, 31 | IsKey = destinationParam.IsKey 32 | } 33 | }; 34 | }) 35 | .ToArray(); 36 | 37 | internal class DestinationParams 38 | { 39 | public DestinationParams(MemberInfo pi, int index) 40 | { 41 | ColumnOrdinal = index; 42 | ColumnName = pi.Name; 43 | ColumnSize = pi.GetCustomAttribute()?.Length ?? 0; 44 | IsKey = pi.GetCustomAttribute() != null; 45 | 46 | var columnAttribute = pi.GetCustomAttribute(); 47 | 48 | if (columnAttribute != null) 49 | { 50 | ColumnOrdinal = columnAttribute.Order > -1 ? columnAttribute.Order : ColumnOrdinal; 51 | ColumnName = !string.IsNullOrWhiteSpace(columnAttribute.Name) ? columnAttribute.Name : ColumnName; 52 | DataTypeName = columnAttribute.TypeName; 53 | } 54 | } 55 | 56 | public string ColumnName { get; } 57 | public int ColumnOrdinal { get; } 58 | public int ColumnSize { get; } 59 | public string DataTypeName { get; } 60 | public bool IsKey { get; } 61 | } 62 | } 63 | } -------------------------------------------------------------------------------- /src/BulkWriter.Demo/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using Microsoft.Data.SqlClient; 4 | using System.Diagnostics; 5 | using System.Threading.Tasks; 6 | 7 | namespace BulkWriter.Demo 8 | { 9 | internal class Program 10 | { 11 | private static async Task Main(string[] args) 12 | { 13 | SetupDb(); 14 | 15 | var timer = new Stopwatch(); 16 | using (var bulkWriter = new BulkWriter(@"Data Source=.\sqlexpress;Database=BulkWriter.Demo;Trusted_Connection=True;Connection Timeout=300") 17 | { 18 | BulkCopyTimeout = 0, 19 | BatchSize = 10000 20 | }) 21 | { 22 | var items = GetDomainEntities(); 23 | timer.Start(); 24 | await bulkWriter.WriteToDatabaseAsync(items); 25 | timer.Stop(); 26 | } 27 | 28 | Console.WriteLine(timer.ElapsedMilliseconds); 29 | Console.ReadKey(); 30 | } 31 | 32 | private static void SetupDb() 33 | { 34 | using (var sqlConnection = new SqlConnection(@"Data Source=.\sqlexpress;Trusted_Connection=True;")) 35 | { 36 | sqlConnection.Open(); 37 | using (var command = new SqlCommand( 38 | @"IF NOT EXISTS (SELECT name FROM master.dbo.sysdatabases WHERE name = N'BulkWriter.Demo') 39 | CREATE DATABASE [BulkWriter.Demo]", sqlConnection)) 40 | { 41 | command.ExecuteNonQuery(); 42 | } 43 | using (var command = new SqlCommand(@"USE [BulkWriter.Demo]; DROP TABLE IF EXISTS dbo.MyDomainEntities", sqlConnection)) 44 | { 45 | command.ExecuteNonQuery(); 46 | } 47 | using (var command = new SqlCommand(@"USE [BulkWriter.Demo]; CREATE TABLE dbo.MyDomainEntities ( 48 | [Id] [int] IDENTITY(1, 1) NOT NULL, 49 | [FirstName] [nvarchar](100), 50 | [LastName] [nvarchar](100), 51 | CONSTRAINT [PK_MyDomainEntities] PRIMARY KEY CLUSTERED ( [Id] ASC ) 52 | )", sqlConnection)) 53 | { 54 | command.ExecuteNonQuery(); 55 | } 56 | } 57 | } 58 | 59 | private static IEnumerable GetDomainEntities() 60 | { 61 | for (var i = 0; i < 10000000; i++) 62 | { 63 | yield return new MyDomainEntity 64 | { 65 | Id = i, 66 | FirstName = $"Bob-{i}", 67 | LastName = $"Smith-{i}" 68 | }; 69 | } 70 | } 71 | 72 | 73 | } 74 | } -------------------------------------------------------------------------------- /docs/examples/pipelining.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Pipelining 4 | parent: Examples 5 | nav_order: 2 6 | --- 7 | # Pipelining 8 | 9 | This example manually implements a basic pipeline as described in the [Pipelining Overview](../pipelining.md#pipelining). You can see the `IEnumerable` objects being chained together through each method call. Since we never trigger evaluation of the `IEnumerable` objects until the call to `BulkWriter.WriteToDatabase()`, we don't end up waiting on or using up memory for all 1,000,000 objects before we write to the database via `SqlBulkCopy`. 10 | 11 | This code effectively implements the same pipeline as the [Advanced Pipelining](./advanced-pipelining.md) example, except we don't have buffers between each step, as show in the [Pipelining Overview](../pipelining.md) 12 | 13 | ```csharp 14 | public class MyEntity 15 | { 16 | public int Id { get; set; } 17 | public string Name { get; set; } 18 | } 19 | 20 | public class MyOtherEntity 21 | { 22 | public int Id { get; set; } 23 | public string FirstName { get; set; } 24 | public string LastName { get; set; } 25 | } 26 | 27 | var entities = GetEntities(); 28 | var maxId = GetMaxId(entities); 29 | var bobObjects = CreateBobObjects(maxId); 30 | var otherBobObjects = ProjectBobObjects(bobObjects); 31 | var aliceObjects = TransformToAliceObjects(otherBobObjects); 32 | WriteToDatabase(aliceObjects); 33 | 34 | // Or a one-liner!! 35 | // WriteToDatabase(TransformToAliceObjects(ProjectBobObjects(CreateBobObjects(GetMaxId(GetEntities()))))); 36 | 37 | private static IEnumerable GetEntities() 38 | { 39 | foreach (var item in Enumerable.Range(0, 1000000)) 40 | { 41 | yield return new MyEntity { Id = item, Name = "Carol" }; 42 | } 43 | } 44 | 45 | private static int GetMaxId(IEnumerable input) 46 | { 47 | return input.Max(i => i.Id); 48 | } 49 | 50 | private static IEnumerable CreateBobObjects(int numberOfBobs) 51 | { 52 | for (var j = 1; j <= numberOfBobs; j++) 53 | { 54 | yield return new MyEntity { Id = j, Name = $"Bob {j}" }; 55 | } 56 | } 57 | 58 | private static IEnumerable ProjectBobObjects(IEnumerable input) 59 | { 60 | foreach (var item in input) 61 | { 62 | var nameParts = item.Name.Split(' '); 63 | yield return new MyOtherEntity {Id = item.Id, FirstName = nameParts[0], LastName = nameParts[1] }; 64 | } 65 | } 66 | 67 | private static IEnumerable TransformToAliceObjects(IEnumerable input) 68 | { 69 | foreach (var item in input) 70 | { 71 | yield return new MyOtherEntity {Id = item.Id, FirstName = "Alice", LastName = item.LastName }; 72 | } 73 | } 74 | 75 | private static void WriteToDatabase(IEnumerable input) 76 | { 77 | using (var bulkWriter = new BulkWriter(connectionString)) 78 | { 79 | bulkWriter.WriteToDatabase(input); 80 | } 81 | } 82 | ``` 83 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/EtlPipeline.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | using BulkWriter.Pipeline.Internal; 6 | using BulkWriter.Pipeline.Steps; 7 | 8 | namespace BulkWriter.Pipeline 9 | { 10 | /// 11 | /// Implements an ETL pipeline that ultimately writes to a BulkWriter object 12 | /// 13 | /// 14 | public sealed class EtlPipeline : IEtlPipeline 15 | { 16 | private readonly List _pipelineSteps = new List(); 17 | 18 | private EtlPipeline() 19 | { 20 | } 21 | 22 | public Task ExecuteAsync() 23 | { 24 | return ExecuteAsync(CancellationToken.None); 25 | } 26 | 27 | public Task ExecuteAsync(CancellationToken cancellationToken) 28 | { 29 | var pipelineTasks = _pipelineSteps.Select(s => s.Run(cancellationToken)); 30 | return Task.WhenAll(pipelineTasks); 31 | } 32 | 33 | private void AddStep(IEtlPipelineStep etlPipelineStep) 34 | { 35 | _pipelineSteps.Add(etlPipelineStep); 36 | } 37 | 38 | /// 39 | /// Begins configuration of a new EtlPipeline 40 | /// 41 | /// Type of input objects to the pipeline 42 | /// An enumerable with input objects for the pipeline 43 | /// Object for continuation of pipeline configuration 44 | public static IEtlPipelineStep StartWith(IEnumerable input) 45 | { 46 | var pipeline = new EtlPipeline(); 47 | var etlPipelineSetupContext = new EtlPipelineContext(pipeline, s => pipeline.AddStep(s)); 48 | var step = new StartEtlPipelineStep(etlPipelineSetupContext, input); 49 | 50 | etlPipelineSetupContext.AddStep(step); 51 | 52 | return step; 53 | } 54 | 55 | #if NETSTANDARD2_1_OR_GREATER || NET6_0_OR_GREATER 56 | /// 57 | /// Begins configuration of a new EtlPipeline 58 | /// 59 | /// Type of input objects to the pipeline 60 | /// An async enumerable with input objects for the pipeline 61 | /// Object for continuation of pipeline configuration 62 | public static IEtlPipelineStep StartWith(IAsyncEnumerable input) 63 | { 64 | var pipeline = new EtlPipeline(); 65 | var etlPipelineSetupContext = new EtlPipelineContext(pipeline, s => pipeline.AddStep(s)); 66 | var step = new AsyncStartEtlPipelineStep(etlPipelineSetupContext, input); 67 | 68 | etlPipelineSetupContext.AddStep(step); 69 | 70 | return step; 71 | } 72 | #endif 73 | 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/BulkWriter.Tests/DbContainerFixture.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Data.SqlClient; 2 | using System.Threading.Tasks; 3 | using Testcontainers.MsSql; 4 | using Xunit; 5 | 6 | namespace BulkWriter.Tests; 7 | 8 | 9 | [CollectionDefinition(nameof(DbContainerFixture))] 10 | public class DbContainerFixtureCollection : ICollectionFixture { } 11 | 12 | public class DbContainerFixture : IAsyncLifetime 13 | { 14 | public MsSqlContainer SqlContainer { get; } = new MsSqlBuilder().Build(); 15 | public string TestConnectionString { get; private set; } 16 | 17 | public async Task InitializeAsync() 18 | { 19 | await SqlContainer.StartAsync(); 20 | 21 | ExecuteNonQuery(SqlContainer.GetConnectionString(), @"IF NOT EXISTS (SELECT name FROM master.dbo.sysdatabases WHERE name = N'BulkWriter.Tests') 22 | CREATE DATABASE [BulkWriter.Tests]"); 23 | 24 | var builder = new SqlConnectionStringBuilder(SqlContainer.GetConnectionString()) 25 | { 26 | InitialCatalog = "BulkWriter.Tests" 27 | }; 28 | 29 | TestConnectionString = builder.ToString(); 30 | } 31 | 32 | public Task DisposeAsync() 33 | => SqlContainer.DisposeAsync().AsTask(); 34 | 35 | public void ExecuteNonQuery(string commandText) => ExecuteNonQuery(TestConnectionString, commandText); 36 | 37 | public void ExecuteNonQuery(string connectionString, string commandText) 38 | { 39 | using (var sqlConnection = new SqlConnection(connectionString)) 40 | { 41 | using (var command = new SqlCommand(commandText, sqlConnection)) 42 | { 43 | sqlConnection.Open(); 44 | command.ExecuteNonQuery(); 45 | } 46 | } 47 | } 48 | 49 | public Task ExecuteScalar(string commandText) => ExecuteScalar(TestConnectionString, commandText); 50 | 51 | public async Task ExecuteScalar(string connectionString, string commandText) 52 | { 53 | using (var sqlConnection = new SqlConnection(connectionString)) 54 | { 55 | await sqlConnection.OpenAsync(); 56 | return await ExecuteScalar(sqlConnection, commandText); 57 | } 58 | } 59 | 60 | public async Task ExecuteScalar(SqlConnection sqlConnection, string commandText, SqlTransaction transaction = null) 61 | { 62 | using (var command = new SqlCommand(commandText, sqlConnection, transaction)) 63 | { 64 | return await command.ExecuteScalarAsync(); 65 | } 66 | } 67 | 68 | public string DropCreate(string tableName) 69 | { 70 | ExecuteNonQuery(TestConnectionString, $"DROP TABLE IF EXISTS [dbo].[{tableName}]"); 71 | 72 | ExecuteNonQuery(TestConnectionString, 73 | "CREATE TABLE [dbo].[" + tableName + "](" + 74 | "[Id] [int] IDENTITY(1,1) NOT NULL," + 75 | "[Name] [nvarchar](50) NULL," + 76 | "CONSTRAINT [PK_" + tableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + 77 | ")"); 78 | 79 | return tableName; 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | The easiest way to contribute is to open an issue and start a discussion. 4 | Then we can decide if and how a feature or a change could be implemented and if you should submit a pull requests with code changes. 5 | 6 | Also read this first: [Being a good open source citizen](https://hackernoon.com/being-a-good-open-source-citizen-9060d0ab9732#.x3hocgw85) 7 | 8 | ## General feedback and discussions 9 | Please start a discussion on the [issue tracker](https://github.com/HeadspringLabs/bulk-writer/issues). 10 | 11 | ## Platform 12 | BulkWriter is built using the RTM tooling that ships with Visual Studio 2019. This is the only configuration accepted. 13 | 14 | ## Building 15 | Run `.\setup.ps1` from the command line for the initial setup. 16 | 17 | Run `.\psake.cmd` to build and test the project. 18 | 19 | ## Bugs and feature requests? 20 | Please log a new issue on the [issue tracker](https://github.com/HeadspringLabs/bulk-writer/issues). 21 | 22 | ## Filing issues 23 | The best way to get your bug fixed is to be as detailed as you can be about the problem. 24 | Providing a minimal project with steps to reproduce the problem is ideal. 25 | Here are questions you can answer before you file a bug to make sure you're not missing any important information. 26 | 27 | 1. Did you read the [documentation](https://github.com/HeadspringLabs/bulk-writer/)? 28 | 2. Did you include the snippet of broken code in the issue? 29 | 3. What are the *EXACT* steps to reproduce this problem? 30 | 31 | GitHub supports [markdown](https://github.github.com/github-flavored-markdown/), so when filing bugs make sure you check the formatting before clicking submit. 32 | 33 | ## Contributing code and content 34 | Make sure you can build the code. Familiarize yourself with the project workflow and our coding conventions. If you don't know what a pull request is read this article: https://help.github.com/articles/using-pull-requests. 35 | 36 | **We only accept PRs to the main branch.** 37 | 38 | Before submitting a feature or substantial code contribution please discuss it with the team and ensure it follows the product roadmap. Here's a list of blog posts that are worth reading before doing a pull request: 39 | 40 | * [Open Source Contribution Etiquette](http://tirania.org/blog/archive/2010/Dec-31.html) by Miguel de Icaza 41 | * [Don't "Push" Your Pull Requests](http://www.igvita.com/2011/12/19/dont-push-your-pull-requests/) by Ilya Grigorik. 42 | * [10 tips for better Pull Requests](http://blog.ploeh.dk/2015/01/15/10-tips-for-better-pull-requests/) by Mark Seemann 43 | * [How to write the perfect pull request](https://github.com/blog/1943-how-to-write-the-perfect-pull-request) by GitHub 44 | 45 | Here's a few things you should always do when making changes to the code base: 46 | 47 | **Commit/Pull Request Format** 48 | 49 | ``` 50 | Summary of the changes (Less than 80 chars) 51 | - Detail 1 52 | - Detail 2 53 | 54 | #bugnumber (in this specific format) 55 | ``` 56 | 57 | **Tests** 58 | 59 | - Tests need to be provided for every bug/feature that is completed. 60 | - Tests only need to be present for issues that need to be verified by QA (e.g. not tasks). 61 | - If there is a scenario that is far too hard to test there does not need to be a test for it. 62 | - "Too hard" is determined by the team as a whole. 63 | -------------------------------------------------------------------------------- /docs/bulkinserting.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Bulk Inserting 4 | parent: Overview 5 | nav_order: 3 6 | --- 7 | # Bulk Inserting 8 | 9 | There are a number of approaches to inserting large amounts of data into the database, including batching inserts, table value parameters, and `SqlBulkCopy`. Each of these approaches is valid for certain use cases (see [https://docs.microsoft.com/en-us/azure/sql-database/sql-database-use-batching-to-improve-performance](https://docs.microsoft.com/en-us/azure/sql-database/sql-database-use-batching-to-improve-performance) for a good breakdown). This library was written with very large data sets in mind, where we have many thousands of records (or more) to insert all at once. 10 | 11 | ## SqlBulkCopy 12 | 13 | We use `SqlBulkCopy` (or the Oracle or MySql equivalents) in *Bulk Writer* as it typically provides the best throughput for very large record sets, but its `WriteToServer()` methods take a `DataRow[]`, a `DataTable` or an `IDataReader`. In order to keep our process memory efficient, we'd like to be able to stream our data set into the data store. 14 | 15 | The methods that take a `DataRow[]` or a `DataTable` aren't really useful when transforms produce very large data sets because they force us to load the entire data set into memory before writing into the target data store. 16 | 17 | That leaves us to examine how to leverage the `WriteToServer(IDataReader)` method. If you think about how `IDataReader` works, users of an `IDataReader` instance must call the `Read()` method before examining a current record. A user advances through the result set until `Read()` returns false, at which point the stream is finished and there is no longer a current record. It has no concept of a previous record. In this way, `IDataReader` is a *non-caching forward-only reader*. 18 | 19 | There are other non-caching forward-only readers in .NET, which are used every day by most developers. The most-used example of this type of reader is `IEnumerator`, which works similarly to `IDataReader`. However, instead of a `bool Read()` method, `IEnumerator` has a `bool MoveNext()` method. Conceptually, `IDataReader` and `IEnumerator` are similar. 20 | 21 | ## Streaming data to SqlBulkCopy 22 | 23 | The Bulk Writer core assembly has an `IDataReader` implementation that wraps an `IEnumerator` called `EnumerableDataReader`. An instance of `EnumerableDataReader` is passed to an instance of `SqlBulkCopy`, so that when `SqlBulkCopy` calls for the next record from the `EnumerableDataReader` instance, it is retrieving the next record from the underlying `IEnumerator`. 24 | 25 | It is conceivable that `IEnumerator.MoveNext()` and `IEnumerator.Current` are proffering records from any type of data source, but you are typically enumerating over an enumerable by retrieving an instance of `IEnumerator` by calling `IEnumerable.GetEnumerator()`. So, you can think of `EnumerableDataReader` in this way: 26 | 27 | > **`EnumerableDataReader` is given to `SqlBulkCopy`, and in turn, `SqlBulkCopy` will stream the data from the `IEnumerable` into your target data store.** 28 | 29 | This technique does require you to reason differently about your ETL jobs: Most jobs *push* data into the target data store. This technique requires you to think about how to structure your transforms so that data is *pulled* from your source data through your transforms instead. 30 | 31 | It is technically possible to produce infinite data sets with `IEnumerable`, which can be pulled into a SQL Server table as soon as your `IEnumerable` can produce them while using very little memory. 32 | -------------------------------------------------------------------------------- /docs/_sass/custom/custom.scss: -------------------------------------------------------------------------------- 1 | //// 2 | //// Typography 3 | //// 4 | 5 | //$body-font-family: -apple-system, BlinkMacSystemFont, "helvetica neue", helvetica, roboto, noto, "segoe ui", arial, sans-serif; 6 | //$mono-font-family: "SFMono-Regular", Menlo, Consolas, Monospace; 7 | //$root-font-size: 16px; // Base font-size for rems 8 | //$body-line-height: 1.4; 9 | //$content-line-height: 1.5; 10 | //$body-heading-line-height: 1.15; 11 | 12 | //// 13 | //// Colors 14 | //// 15 | 16 | //$white: #fff; 17 | 18 | //$grey-dk-000: #959396; 19 | //$grey-dk-100: #5c5962; 20 | //$grey-dk-200: #44434d; 21 | //$grey-dk-250: #302d36; 22 | //$grey-dk-300: #27262b; 23 | 24 | //$grey-lt-000: #f5f6fa; 25 | //$grey-lt-100: #eeebee; 26 | //$grey-lt-200: #ecebed; 27 | //$grey-lt-300: #e6e1e8; 28 | 29 | //$purple-000: #7253ed; 30 | //$purple-100: #5e41d0; 31 | //$purple-200: #4e26af; 32 | //$purple-300: #381885; 33 | 34 | //$blue-000: #2c84fa; 35 | //$blue-100: #2869e6; 36 | //$blue-200: #264caf; 37 | //$blue-300: #183385; 38 | 39 | //$green-000: #41d693; 40 | //$green-100: #11b584; 41 | //$green-200: #009c7b; 42 | //$green-300: #026e57; 43 | 44 | //$yellow-000: #ffeb82; 45 | //$yellow-100: #fadf50; 46 | //$yellow-200: #f7d12e; 47 | //$yellow-300: #e7af06; 48 | 49 | //$red-000: #f77e7e; 50 | //$red-100: #f96e65; 51 | //$red-200: #e94c4c; 52 | //$red-300: #dd2e2e; 53 | 54 | //$body-background-color: $white; 55 | //$sidebar-color: $grey-lt-000; 56 | //$search-background-color: $white; 57 | //$table-background-color: $white; 58 | //$code-background-color: $grey-lt-000; 59 | 60 | //$body-text-color: $grey-dk-100; 61 | //$body-heading-color: $grey-dk-300; 62 | //$search-result-preview-color: $grey-dk-000; 63 | //$nav-child-link-color: $grey-dk-100; 64 | $link-color: #0079C1; 65 | $btn-primary-color: #0079C1; 66 | //$base-button-color: #f7f7f7; 67 | 68 | //// 69 | //// Spacing 70 | //// 71 | 72 | //$spacing-unit: 1rem; // 1rem == 16px 73 | 74 | //$spacers: ( 75 | //sp-0: 0, 76 | //sp-1: $spacing-unit * 0.25, 77 | //sp-2: $spacing-unit * 0.5, 78 | //sp-3: $spacing-unit * 0.75, 79 | //sp-4: $spacing-unit, 80 | //sp-5: $spacing-unit * 1.5, 81 | //sp-6: $spacing-unit * 2, 82 | //sp-7: $spacing-unit * 2.5, 83 | //sp-8: $spacing-unit * 3, 84 | //sp-9: $spacing-unit * 3.5, 85 | //sp-10: $spacing-unit * 4 86 | //); 87 | 88 | //$sp-1: map-get($spacers, sp-1); // 0.25 rem == 4px 89 | //$sp-2: map-get($spacers, sp-2); // 0.5 rem == 8px 90 | //$sp-3: map-get($spacers, sp-3); // 0.75 rem == 12px 91 | //$sp-4: map-get($spacers, sp-4); // 1 rem == 16px 92 | //$sp-5: map-get($spacers, sp-5); // 1.5 rem == 24px 93 | //$sp-6: map-get($spacers, sp-6); // 2 rem == 32px 94 | //$sp-7: map-get($spacers, sp-7); // 2.5 rem == 40px 95 | //$sp-8: map-get($spacers, sp-8); // 3 rem == 48px 96 | //$sp-9: map-get($spacers, sp-9); // 4 rem == 48px 97 | //$sp-10: map-get($spacers, sp-10); // 4.5 rem == 48px 98 | 99 | //// 100 | //// Borders 101 | //// 102 | 103 | //$border: 1px solid; 104 | //$border-radius: 4px; 105 | //$border-color: $grey-lt-100; 106 | 107 | //// 108 | //// Grid system 109 | //// 110 | 111 | //$gutter-spacing: $sp-6; 112 | //$gutter-spacing-sm: $sp-4; 113 | //$nav-width: 264px; 114 | //$nav-width-md: 248px; 115 | //$content-width: 800px; 116 | //$header-height: 60px; 117 | //$search-results-width: 500px; 118 | 119 | //// 120 | //// Media queries in pixels 121 | //// 122 | 123 | //$media-queries: ( 124 | //xs: 320px, 125 | //sm: 500px, 126 | //md: $content-width, 127 | //lg: $content-width + $nav-width, 128 | //xl: 1400px 129 | //); 130 | -------------------------------------------------------------------------------- /src/BulkWriter.Tests/TypeExtensionsTests.cs: -------------------------------------------------------------------------------- 1 | using System.ComponentModel.DataAnnotations; 2 | using System.ComponentModel.DataAnnotations.Schema; 3 | using System.Linq; 4 | using BulkWriter.Internal; 5 | using Xunit; 6 | 7 | namespace BulkWriter.Tests 8 | { 9 | public class AnnotatedEntity 10 | { 11 | public int Id { get; private set; } 12 | 13 | [Column("StringProperty")] 14 | public string MappedStringProperty { get; set; } 15 | 16 | [NotMapped] 17 | public decimal NotMappedDecimalProperty { get; private set; } 18 | 19 | } 20 | 21 | [Table("MyTable")] 22 | public class MyTestClass 23 | { 24 | [Column("CustomId")] 25 | [Key] 26 | public int Id { get; set; } 27 | 28 | [Column("CustomName")] 29 | public string Name { get; set; } 30 | 31 | [NotMapped] 32 | public string IgnoredColumn { get; set; } 33 | } 34 | 35 | public class TypeExtensionsTests 36 | { 37 | [Fact] 38 | public void Verify_Build_Mappings_Annotations() 39 | { 40 | var mappings = typeof(MyTestClass).BuildMappings(); 41 | 42 | Assert.Equal(3, mappings.Length); 43 | 44 | var idMap = mappings.Single(m => m.Source.Property.Name == nameof(MyTestClass.Id)); 45 | Assert.Equal("CustomId", idMap.Destination.ColumnName); 46 | Assert.Equal(0, idMap.Destination.ColumnOrdinal); 47 | Assert.True(idMap.Destination.IsKey); 48 | Assert.Equal(0, idMap.Source.Ordinal); 49 | Assert.Equal(nameof(MyTestClass.Id), idMap.Source.Property.Name); 50 | Assert.True(idMap.ShouldMap); 51 | 52 | var nameMap = mappings.Single(m => m.Source.Property.Name == nameof(MyTestClass.Name)); 53 | Assert.Equal("CustomName", nameMap.Destination.ColumnName); 54 | Assert.Equal(1, nameMap.Destination.ColumnOrdinal); 55 | Assert.False(nameMap.Destination.IsKey); 56 | Assert.Equal(1, nameMap.Source.Ordinal); 57 | Assert.Equal(nameof(MyTestClass.Name), nameMap.Source.Property.Name); 58 | Assert.True(nameMap.ShouldMap); 59 | 60 | var ignoreColumnMap = mappings.Single(m => m.Source.Property.Name == nameof(MyTestClass.IgnoredColumn)); 61 | Assert.Equal(nameof(MyTestClass.IgnoredColumn), ignoreColumnMap.Destination.ColumnName); 62 | Assert.Equal(2, ignoreColumnMap.Destination.ColumnOrdinal); 63 | Assert.False(ignoreColumnMap.Destination.IsKey); 64 | Assert.Equal(2, ignoreColumnMap.Source.Ordinal); 65 | Assert.Equal(nameof(MyTestClass.IgnoredColumn), ignoreColumnMap.Source.Property.Name); 66 | Assert.False(ignoreColumnMap.ShouldMap); 67 | } 68 | 69 | [Fact] 70 | public void Not_Mapped_Attribute_Should_Not_Map() 71 | { 72 | var propertyMappings = typeof(AnnotatedEntity).BuildMappings(); 73 | var notMappedProperty = propertyMappings.Single(x => x.Source.Property.Name == nameof(AnnotatedEntity.NotMappedDecimalProperty)); 74 | 75 | Assert.False(notMappedProperty.ShouldMap); 76 | } 77 | 78 | [Fact] 79 | public void Column_Attribute_Should_Map_With_Proper_Source_And_Destination() 80 | { 81 | var propertyMappings = typeof(AnnotatedEntity).BuildMappings(); 82 | var mappedProperty = propertyMappings.Single(x => x.Source.Property.Name == nameof(AnnotatedEntity.MappedStringProperty)); 83 | 84 | Assert.True(mappedProperty.Destination.ColumnName == "StringProperty"); 85 | Assert.True(mappedProperty.ShouldMap); 86 | } 87 | } 88 | } -------------------------------------------------------------------------------- /BulkWriter.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.29509.3 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BulkWriter", "src\BulkWriter\BulkWriter.csproj", "{A232D2EA-6E54-4941-9496-4DB98122B14C}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BulkWriter.Demo", "src\BulkWriter.Demo\BulkWriter.Demo.csproj", "{09FA9E48-9696-4E1E-A1DB-B4D8ACE96B18}" 9 | EndProject 10 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BulkWriter.Tests", "src\BulkWriter.Tests\BulkWriter.Tests.csproj", "{6DAD0F01-655C-49BF-924F-01C8ED7CA62C}" 11 | EndProject 12 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Build", "Build", "{432DE5FB-310C-432E-8ADE-ED6FCBD45C18}" 13 | ProjectSection(SolutionItems) = preProject 14 | setup.ps1 = setup.ps1 15 | Build.ps1 = Build.ps1 16 | Push.ps1 = Push.ps1 17 | EndProjectSection 18 | EndProject 19 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BulkWriter.Benchmark", "src\BulkWriter.Benchmark\BulkWriter.Benchmark.csproj", "{C39F623C-7958-427E-B5E3-2CE704A69B85}" 20 | EndProject 21 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{95C9E859-9C1F-44C1-B38A-7383439785A0}" 22 | ProjectSection(SolutionItems) = preProject 23 | .editorconfig = .editorconfig 24 | README.md = README.md 25 | .github\workflows\ci.yml = .github\workflows\ci.yml 26 | .github\workflows\triage-issues.yml = .github\workflows\triage-issues.yml 27 | .github\workflows\devskim.yml = .github\workflows\devskim.yml 28 | .github\workflows\release.yml = .github\workflows\release.yml 29 | .gitignore = .gitignore 30 | Directory.Build.props = Directory.Build.props 31 | Directory.Build.targets = Directory.Build.targets 32 | EndProjectSection 33 | EndProject 34 | Global 35 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 36 | Debug|Any CPU = Debug|Any CPU 37 | Release|Any CPU = Release|Any CPU 38 | EndGlobalSection 39 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 40 | {A232D2EA-6E54-4941-9496-4DB98122B14C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 41 | {A232D2EA-6E54-4941-9496-4DB98122B14C}.Debug|Any CPU.Build.0 = Debug|Any CPU 42 | {A232D2EA-6E54-4941-9496-4DB98122B14C}.Release|Any CPU.ActiveCfg = Release|Any CPU 43 | {A232D2EA-6E54-4941-9496-4DB98122B14C}.Release|Any CPU.Build.0 = Release|Any CPU 44 | {09FA9E48-9696-4E1E-A1DB-B4D8ACE96B18}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 45 | {09FA9E48-9696-4E1E-A1DB-B4D8ACE96B18}.Debug|Any CPU.Build.0 = Debug|Any CPU 46 | {09FA9E48-9696-4E1E-A1DB-B4D8ACE96B18}.Release|Any CPU.ActiveCfg = Release|Any CPU 47 | {09FA9E48-9696-4E1E-A1DB-B4D8ACE96B18}.Release|Any CPU.Build.0 = Release|Any CPU 48 | {6DAD0F01-655C-49BF-924F-01C8ED7CA62C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 49 | {6DAD0F01-655C-49BF-924F-01C8ED7CA62C}.Debug|Any CPU.Build.0 = Debug|Any CPU 50 | {6DAD0F01-655C-49BF-924F-01C8ED7CA62C}.Release|Any CPU.ActiveCfg = Release|Any CPU 51 | {6DAD0F01-655C-49BF-924F-01C8ED7CA62C}.Release|Any CPU.Build.0 = Release|Any CPU 52 | {C39F623C-7958-427E-B5E3-2CE704A69B85}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 53 | {C39F623C-7958-427E-B5E3-2CE704A69B85}.Debug|Any CPU.Build.0 = Debug|Any CPU 54 | {C39F623C-7958-427E-B5E3-2CE704A69B85}.Release|Any CPU.ActiveCfg = Release|Any CPU 55 | {C39F623C-7958-427E-B5E3-2CE704A69B85}.Release|Any CPU.Build.0 = Release|Any CPU 56 | EndGlobalSection 57 | GlobalSection(SolutionProperties) = preSolution 58 | HideSolutionNode = FALSE 59 | EndGlobalSection 60 | GlobalSection(ExtensibilityGlobals) = postSolution 61 | SolutionGuid = {ED2FB9A8-E280-4ABA-9DC3-E19A8F9F669A} 62 | EndGlobalSection 63 | EndGlobal 64 | -------------------------------------------------------------------------------- /docs/features/bulkwriter.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: BulkWriter 4 | parent: Features 5 | nav_order: 1 6 | --- 7 | # BulkWriter 8 | 9 | The `BulkWriter` class is (naturally) the main attraction of this library. It implements the streaming from `IEnumerable` or `IAsyncEnumerable` to `SqlBulkCopy`, as described in [Motivation](../motivation.md). The class will automatically handle mapping of your DTOs, so the only configuration you need to provide is a database connection (or connection string) and an input `IEnumerable` or `IAsyncEnumerable`. 10 | 11 | The following constructors are provided: 12 | 13 | ```csharp 14 | public BulkWriter(string connectionString) 15 | public BulkWriter(string connectionString, SqlBulkCopyOptions options) 16 | public BulkWriter(SqlConnection connection, SqlTransaction transaction = null) 17 | public BulkWriter(SqlConnection connection, SqlBulkCopyOptions options, SqlTransaction transaction = null) 18 | ``` 19 | 20 | **Note:** `BulkWriter` is `IDisposable` and *must* be disposed properly! 21 | 22 | ## Entity Mapping 23 | 24 | `BulkWriter` uses standard Data Annotations from the `System.ComponentModel.DataAnnotations.Schema` [namespace](https://docs.microsoft.com/en-us/dotnet/api/system.componentmodel.dataannotations.schema?view=netcore-3.1) to map your DTO to tables and columns. In the absence of attributes on a property or class, the name of that property or class is used when mapping to your output table. 25 | 26 | The following attributes are supported: 27 | 28 | - TableAttribute 29 | - ColumnAttribute 30 | - KeyAttribute 31 | - MaxLengthAttribute 32 | - NotMappedAttribute 33 | 34 | ## Example 35 | 36 | ```csharp 37 | [Table(Name = "MyTable")] 38 | public class MyClass 39 | { 40 | [Key] 41 | public int Id { get; set; } 42 | 43 | [MaxLength(100)] 44 | public string Name { get; set; } 45 | 46 | public decimal WeightInKg { get; set; } 47 | 48 | [NotMapped] 49 | public bool CommunicatesSecurely { get; set; } 50 | } 51 | 52 | //using connection string 53 | using (var writer = new BulkWriter(connectionString)) 54 | { 55 | var items = Enumerable.Range(1, 1000).Select(i => new MyClass { Id = i, Name = "Bob", WeightInKg = 80, CommunicatesSecurely = true }); 56 | writer.WriteToDatabase(items); 57 | } 58 | 59 | //with custom SqlBulkCopyOptions 60 | var bulkCopyOptions = new SqlBulkCopyOptions 61 | { 62 | BulkCopyTimeout = 0, 63 | BatchSize = 10000 64 | }; 65 | using (var bulkWriter = new BulkWriter(connectionString, bulkCopyOptions)) 66 | { 67 | var items = Enumerable.Range(1, 1000000).Select(i => new MyClass { Id = i, Name = "Bob", WeightInKg = 80, CommunicatesSecurely = true }); 68 | writer.WriteToDatabase(items); 69 | } 70 | 71 | //using direct SQL connection 72 | using (var connection = new SqlConnection(_connectionString)) 73 | { 74 | var items = Enumerable.Range(1, 1000).Select(i => new MyClass { Id = i, Name = "Bob", WeightInKg = 80, CommunicatesSecurely = true }); 75 | await connection.OpenAsync(); 76 | 77 | using var writer = new BulkWriter(connection); 78 | writer.WriteToDatabase(items); 79 | } 80 | 81 | //inside a transaction 82 | var bulkCopyOptions = new SqlBulkCopyOptions 83 | { 84 | BulkCopyTimeout = 0, 85 | BatchSize = 10000 86 | }; 87 | using (var connection = new SqlConnection(_connectionString)) 88 | { 89 | var items = Enumerable.Range(1, 1000000).Select(i => new MyClass { Id = i, Name = "Bob", WeightInKg = 80, CommunicatesSecurely = true }); 90 | await connection.OpenAsync(); 91 | 92 | using var transaction = connection.BeginTransaction(); 93 | using var writer = new BulkWriter(connection, bulkCopyOptions, transaction); 94 | await writer.WriteToDatabaseAsync(items); 95 | } 96 | ``` 97 | -------------------------------------------------------------------------------- /docs/pipelining.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Pipelining 4 | parent: Overview 5 | nav_order: 4 6 | --- 7 | # Pipelining 8 | 9 | Some ETL jobs require transforms that are so complex (and the code is so complicated), it's easier to reason about and implement the transforms in steps. You can create these steps using a single LINQ query (which are actually pipelines themselves), or you can implement a Pipeline with Stages. 10 | 11 | Typical pipelines push data from one stage to the next. 12 | 13 | ========= ========= ========= ========== 14 | | | | | | | | | 15 | Push -> | Stage | Push -> | Stage | Push -> | Stage | Push -> | Sink | 16 | | | | | | | | | 17 | ========= ========= ========= ========== 18 | 19 | Using *Bulk Writer*, data is *pulled* through the Pipeline. 20 | 21 | ========= ========= ========= ========== 22 | | | | | | | | | 23 | | Stage | <- Pull | Stage | <- Pull | Stage | <- Pull | Sink | 24 | | | | | | | | | 25 | ========= ========= ========= ========== 26 | 27 | See the [basic pipelining](./examples/pipelining.md) code for an example implementation. 28 | 29 | # Advanced Pipelining 30 | 31 | The example above is fine, but we're only processing one source data item at a time. If one pipeline stage takes longer to produce output than other stages, all stage processing suffers. There are times when you'd like any pipeline stage to continue to process available items even if other stages in the pipeline are blocked. For an example, let's consider a segment of a pipeline comprised of two stages. 32 | 33 | Suppose Stage 1 was IO-bound because it queried for and produced a result set for each of its input items. In other words, Stage 1 is producing a larger data set than its input and it may be spending a lot of its time waiting. 34 | 35 | Next, supposed Stage 2 was CPU bound because it performed hundreds of calculations on each output produced by Stage 1. In this example, there's no reason why Stage 2 shouldn't be able to perform its calculations while Stage 1 is blocked or producing input for Stage 2. 36 | 37 | We form a pipeline like this by running each pipeline stage on its own thread and by introducing an input and output buffer between each stage. Now, instead of a pipeline stage pulling directly from the previous stage, each pipeline stage pushes to and pulls from its input and output buffer, respectively. 38 | 39 | Such a pipeline would look like this: 40 | 41 | === === === 42 | | | ========= | | ========= | | ========== 43 | === | | === | | === | | 44 | Push -> | | <- Pull | Stage | Push -> | | <- Pull | Stage | Push -> | | <- Pull | Sink | 45 | === | | === | | === | | 46 | | | ========= | | ========= | | ========== 47 | === === === 48 | Buffer Buffer Buffer 49 | 50 | Since each stage is running on its own thread, we need to be careful so that the stage's thread doesn't end before all the items the pipeline needs to process have been pushed through. So, we'll need a way to indicate that previous stages have finished. 51 | 52 | This pipeline structure is precisely what's implemented by the `EtlPipeline` [class](./features/etlpipeline.md). See the [advanced pipelining](./examples/advanced-pipelining.md) code for an example implementation. 53 | -------------------------------------------------------------------------------- /src/BulkWriter.Tests/PropertyInfoExtensionsTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Reflection; 3 | using BulkWriter.Internal; 4 | using Xunit; 5 | 6 | namespace BulkWriter.Tests 7 | { 8 | 9 | public class PropertyInfoExtensionsTests 10 | { 11 | [Fact] 12 | public void Can_Get_Correct_ValueType_Property_Value() 13 | { 14 | var valueTypeProperty = typeof (MyTestClass).GetProperty("ValueTypeProperty", BindingFlags.Public | BindingFlags.Instance); 15 | Assert.NotNull(valueTypeProperty); 16 | var valueTypePropertyValueGetter = valueTypeProperty.GetValueGetter(); 17 | Assert.NotNull(valueTypePropertyValueGetter); 18 | 19 | var testClass = new MyTestClass(); 20 | 21 | var zeroValue = valueTypePropertyValueGetter(testClass); 22 | Assert.IsType(zeroValue); 23 | Assert.Equal(0, zeroValue); 24 | 25 | testClass.ValueTypeProperty = 418; 26 | var fourOneEightValue = valueTypePropertyValueGetter(testClass); 27 | Assert.IsType(fourOneEightValue); 28 | Assert.Equal(418, fourOneEightValue); 29 | } 30 | 31 | [Fact] 32 | public void Can_Get_Correct_ReferenceType_Property_Value() 33 | { 34 | var referenceTypeProperty = typeof(MyTestClass).GetProperty("ReferenceTypeProperty", BindingFlags.Public | BindingFlags.Instance); 35 | Assert.NotNull(referenceTypeProperty); 36 | var referenceTypePropertyValueGetter = referenceTypeProperty.GetValueGetter(); 37 | Assert.NotNull(referenceTypePropertyValueGetter); 38 | 39 | var testClass = new MyTestClass(); 40 | 41 | var nullValue = referenceTypePropertyValueGetter(testClass); 42 | Assert.Null(nullValue); 43 | 44 | testClass.ReferenceTypeProperty = "418"; 45 | var fourOneEightValue = referenceTypePropertyValueGetter(testClass); 46 | Assert.IsType(fourOneEightValue); 47 | Assert.Equal("418", fourOneEightValue); 48 | } 49 | 50 | [Fact] 51 | public void Can_Get_Correct_NullableType_PropertyValue() 52 | { 53 | var nullableTypeProperty = typeof(MyTestClass).GetProperty("NullableTypeProperty", BindingFlags.Public | BindingFlags.Instance); 54 | Assert.NotNull(nullableTypeProperty); 55 | var nullableTypePropertyValueGetter = nullableTypeProperty.GetValueGetter(); 56 | Assert.NotNull(nullableTypePropertyValueGetter); 57 | 58 | var testClass = new MyTestClass(); 59 | 60 | var nullValue = nullableTypePropertyValueGetter(testClass); 61 | Assert.Null(nullValue); 62 | 63 | testClass.NullableTypeProperty = 418; 64 | var fourOneEightValue = nullableTypePropertyValueGetter(testClass); 65 | Assert.Equal(typeof(int), fourOneEightValue.GetType()); 66 | Assert.Equal(418, fourOneEightValue); 67 | } 68 | 69 | [Fact] 70 | public void Value_Getter_Is_Cached() 71 | { 72 | var valueTypeProperty = typeof(MyTestClass).GetProperty("ValueTypeProperty", BindingFlags.Public | BindingFlags.Instance); 73 | Assert.NotNull(valueTypeProperty); 74 | 75 | var valueTypePropertyValueGetter1 = valueTypeProperty.GetValueGetter(); 76 | Assert.NotNull(valueTypePropertyValueGetter1); 77 | 78 | var valueTypePropertyValueGetter2 = valueTypeProperty.GetValueGetter(); 79 | Assert.NotNull(valueTypePropertyValueGetter2); 80 | 81 | Assert.Same(valueTypePropertyValueGetter1, valueTypePropertyValueGetter2); 82 | } 83 | 84 | public class MyTestClass 85 | { 86 | public int ValueTypeProperty { get; set; } 87 | 88 | public string ReferenceTypeProperty { get; set; } 89 | 90 | public int? NullableTypeProperty { get; set; } 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/BulkWriter.Benchmark/DbHelpers.cs: -------------------------------------------------------------------------------- 1 | using System.ComponentModel.DataAnnotations.Schema; 2 | using System.Reflection; 3 | using Microsoft.Data.SqlClient; 4 | 5 | namespace BulkWriter.Benchmark 6 | { 7 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Security", "CA2100:Review SQL queries for security vulnerabilities", Justification = "No user input here")] 8 | internal static class DbHelpers 9 | { 10 | private const string SetupConnectionString = "Data Source=.\\;Trusted_Connection=True;"; 11 | 12 | private const string ConnectionString = "Data Source=.\\;Trusted_Connection=True;Initial Catalog=BulkWriter.Benchmark"; 13 | private const string DbName = "BulkWriter.Benchmark"; 14 | 15 | internal static SqlConnection OpenSqlConnection() 16 | { 17 | var connection = new SqlConnection(ConnectionString); 18 | connection.Open(); 19 | 20 | return connection; 21 | } 22 | 23 | internal static void SetupDb() 24 | { 25 | using var sqlConnection = new SqlConnection(SetupConnectionString); 26 | sqlConnection.Open(); 27 | 28 | CreateDatabase(sqlConnection); 29 | SetSimpleRecovery(sqlConnection); 30 | 31 | DropTable(sqlConnection); 32 | CreateDomainEntitiesTable(sqlConnection); 33 | } 34 | 35 | internal static void CreateDatabase(SqlConnection sqlConnection) 36 | { 37 | var createDatabaseSql = @$"IF NOT EXISTS (SELECT name FROM master.dbo.sysdatabases WHERE name = N'{DbName}') 38 | CREATE DATABASE [{DbName}]"; 39 | using var command = new SqlCommand(createDatabaseSql, sqlConnection); 40 | command.ExecuteNonQuery(); 41 | 42 | SetDatabaseSize(sqlConnection); 43 | } 44 | 45 | internal static void SetSimpleRecovery(SqlConnection sqlConnection) 46 | { 47 | var alterDatabaseSql = $"ALTER DATABASE [{DbName}] SET RECOVERY SIMPLE"; 48 | using var command = new SqlCommand(alterDatabaseSql, sqlConnection); 49 | command.ExecuteNonQuery(); 50 | } 51 | 52 | internal static void SetDatabaseSize(SqlConnection sqlConnection) 53 | { 54 | var alterDatabaseSql = @$"IF NOT EXISTS(SELECT name FROM [{DbName}].sys.database_files WHERE [type]=0 AND size > 100000) 55 | ALTER DATABASE [{DbName}] MODIFY FILE (Name='{DbName}', SIZE = 1000MB)"; 56 | using var command = new SqlCommand(alterDatabaseSql, sqlConnection); 57 | command.ExecuteNonQuery(); 58 | } 59 | 60 | internal static void CreateDomainEntitiesTable(SqlConnection sqlConnection) 61 | { 62 | var tableName = GetTableName(); 63 | var createTableSql = @$"USE [{DbName}]; CREATE TABLE dbo.{tableName} ( 64 | [Id] [bigint] NOT NULL, 65 | [FirstName] [nvarchar](100), 66 | [LastName] [nvarchar](100), 67 | CONSTRAINT [PK_{1}] PRIMARY KEY CLUSTERED ([Id] ASC ) 68 | )"; 69 | 70 | using var command = new SqlCommand(createTableSql, sqlConnection); 71 | command.ExecuteNonQuery(); 72 | } 73 | 74 | internal static void DropTable(SqlConnection sqlConnection) 75 | { 76 | var tableName = GetTableName(); 77 | var dropTableSql = $"USE [{DbName}]; DROP TABLE IF EXISTS {tableName}"; 78 | using var command = new SqlCommand(dropTableSql, sqlConnection); 79 | command.ExecuteNonQuery(); 80 | } 81 | 82 | internal static void TruncateTable(SqlConnection sqlConnection) 83 | { 84 | var tableName = GetTableName(); 85 | var dropTableSql = $"USE [{DbName}]; TRUNCATE TABLE {tableName}"; 86 | using var command = new SqlCommand(dropTableSql, sqlConnection); 87 | command.ExecuteNonQuery(); 88 | } 89 | 90 | internal static string GetTableName() 91 | { 92 | var t = typeof(TEntity); 93 | var tableNameAttribute = t.GetCustomAttribute(); 94 | return tableNameAttribute != null ? tableNameAttribute.Name : t.Name; 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/BulkWriter.Benchmark/Benchmarks/BulkWriterBenchmark.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | using System.Threading.Tasks; 3 | using BenchmarkDotNet.Attributes; 4 | using Microsoft.Data.SqlClient; 5 | using System.Linq; 6 | 7 | namespace BulkWriter.Benchmark.Benchmarks 8 | { 9 | public class BulkWriterBenchmark : BenchmarkBaseClass 10 | { 11 | [Benchmark] 12 | public async Task BulkWriter() 13 | { 14 | await using var sqlConnection = DbHelpers.OpenSqlConnection(); 15 | using var bulkWriter = new BulkWriter(sqlConnection) 16 | { 17 | BulkCopyTimeout = 0, 18 | BatchSize = 10000 19 | }; 20 | 21 | var items = GetTestRecords(); 22 | await bulkWriter.WriteToDatabaseAsync(items); 23 | } 24 | 25 | [Benchmark] 26 | public async Task BulkWriterAsyncEnumerable() 27 | { 28 | await using var sqlConnection = DbHelpers.OpenSqlConnection(); 29 | using var bulkWriter = new BulkWriter(sqlConnection) 30 | { 31 | BulkCopyTimeout = 0, 32 | BatchSize = 10000 33 | }; 34 | 35 | var items = GetTestRecords().ToAsyncEnumerable(); 36 | await bulkWriter.WriteToDatabaseAsync(items); 37 | } 38 | 39 | [Benchmark(Baseline = true)] 40 | public async Task OneRecordAtATime() 41 | { 42 | var tableName = DbHelpers.GetTableName(); 43 | var insertSql = $"INSERT INTO {tableName} (Id, FirstName, LastName) VALUES (@Id, @FirstName, @LastName)"; 44 | 45 | await using var sqlConnection = DbHelpers.OpenSqlConnection(); 46 | 47 | var records = GetTestRecords(); 48 | foreach (var domainEntity in records) 49 | { 50 | var sqlCommand = new SqlCommand(insertSql, sqlConnection); 51 | sqlCommand.Parameters.AddWithValue("@Id", domainEntity.Id); 52 | sqlCommand.Parameters.AddWithValue("@FirstName", domainEntity.FirstName); 53 | sqlCommand.Parameters.AddWithValue("@LastName", domainEntity.LastName); 54 | 55 | await sqlCommand.ExecuteNonQueryAsync(); 56 | } 57 | } 58 | 59 | [Benchmark] 60 | public async Task Batched100() 61 | { 62 | var tableName = DbHelpers.GetTableName(); 63 | var insertSql = $"INSERT INTO {tableName} (Id, FirstName, LastName) VALUES "; 64 | 65 | await using var sqlConnection = DbHelpers.OpenSqlConnection(); 66 | 67 | var batchSize = 100; 68 | var currentBatchSize = 0; 69 | var records = GetTestRecords(); 70 | 71 | var queryBuilder = new StringBuilder(insertSql); 72 | var sqlCommand = new SqlCommand("", sqlConnection); 73 | 74 | foreach (var record in records) 75 | { 76 | queryBuilder.Append(currentBatchSize == 0 77 | ? "(@p0, @p1, @p2)" 78 | : $",(@p{currentBatchSize * 3}, @p{currentBatchSize * 3 + 1}, @p{currentBatchSize * 3 + 2})"); 79 | 80 | sqlCommand.Parameters.AddWithValue($"@p{currentBatchSize * 3}", record.Id); 81 | sqlCommand.Parameters.AddWithValue($"@p{currentBatchSize * 3 + 1}", record.FirstName); 82 | sqlCommand.Parameters.AddWithValue($"@p{currentBatchSize * 3 + 2}", record.LastName); 83 | 84 | ++currentBatchSize; 85 | 86 | if (currentBatchSize == batchSize) 87 | { 88 | sqlCommand.CommandText = queryBuilder.ToString(); 89 | await sqlCommand.ExecuteNonQueryAsync(); 90 | 91 | currentBatchSize = 0; 92 | 93 | queryBuilder.Clear(); 94 | queryBuilder.Append(insertSql); 95 | 96 | sqlCommand.CommandText = ""; 97 | sqlCommand.Parameters.Clear(); 98 | } 99 | } 100 | 101 | if (currentBatchSize > 0) 102 | { 103 | sqlCommand.CommandText = queryBuilder.ToString(); 104 | await sqlCommand.ExecuteNonQueryAsync(); 105 | } 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/BulkWriter.Tests/BulkWriterInitializationTests.cs: -------------------------------------------------------------------------------- 1 | using System.ComponentModel.DataAnnotations.Schema; 2 | using System.Linq; 3 | using System.Threading.Tasks; 4 | using Xunit; 5 | 6 | namespace BulkWriter.Tests 7 | { 8 | [Collection(nameof(DbContainerFixture))] 9 | public class BulkWriterInitializationTests 10 | { 11 | private readonly string _connectionString; 12 | 13 | public class BulkWriterInitializationTestsMyTestClass 14 | { 15 | public int Id { get; set; } 16 | 17 | public string Name { get; set; } 18 | } 19 | 20 | private readonly DbContainerFixture _fixture; 21 | 22 | public BulkWriterInitializationTests(DbContainerFixture fixture) 23 | { 24 | _fixture = fixture; 25 | _connectionString = fixture.TestConnectionString; 26 | } 27 | 28 | [Table("TestClass2")] 29 | public class BulkWriterInitializationTestsMyTestClassAnnotation 30 | { 31 | public int Id { get; set; } 32 | 33 | public string Name { get; set; } 34 | } 35 | 36 | [Fact] 37 | public async Task CanSetBulkCopyParameters() 38 | { 39 | string tableName = DropCreate(nameof(BulkWriterInitializationTestsMyTestClass)); 40 | 41 | const int timeout = 10; 42 | const int batchSize = 1000; 43 | bool setupCallbackInvoked = false; 44 | 45 | var writer = new BulkWriter(_connectionString) 46 | { 47 | BulkCopyTimeout = timeout, 48 | BatchSize = batchSize, 49 | BulkCopySetup = bcp => 50 | { 51 | setupCallbackInvoked = true; 52 | Assert.Equal(nameof(BulkWriterInitializationTestsMyTestClass), bcp.DestinationTableName); 53 | Assert.Equal(timeout, bcp.BulkCopyTimeout); 54 | Assert.Equal(batchSize, bcp.BatchSize); 55 | } 56 | }; 57 | 58 | var items = Enumerable.Range(1, 10) 59 | .Select(i => new BulkWriterInitializationTestsMyTestClass { Id = i, Name = "Bob" }); 60 | 61 | writer.WriteToDatabase(items); 62 | 63 | var count = (int)await _fixture.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {tableName}"); 64 | 65 | Assert.Equal(10, count); 66 | Assert.True(setupCallbackInvoked); 67 | } 68 | 69 | [Fact] 70 | public async Task CanSetBulkCopyParameters_Respects_Table_Annotation() 71 | { 72 | string tableName = DropCreate("TestClass2"); 73 | const int timeout = 10; 74 | const int batchSize = 1000; 75 | bool setupCallbackInvoked = false; 76 | 77 | var writer = new BulkWriter(_connectionString) 78 | { 79 | BulkCopyTimeout = timeout, 80 | BatchSize = batchSize, 81 | BulkCopySetup = bcp => 82 | { 83 | setupCallbackInvoked = true; 84 | Assert.Equal("TestClass2", bcp.DestinationTableName); 85 | Assert.Equal(timeout, bcp.BulkCopyTimeout); 86 | Assert.Equal(batchSize, bcp.BatchSize); 87 | } 88 | }; 89 | 90 | var items = Enumerable.Range(1, 10) 91 | .Select(i => new BulkWriterInitializationTestsMyTestClassAnnotation { Id = i, Name = "Bob" }); 92 | 93 | writer.WriteToDatabase(items); 94 | 95 | var count = (int)await _fixture.ExecuteScalar(_connectionString, $"SELECT COUNT(1) FROM {tableName }"); 96 | 97 | Assert.Equal(10, count); 98 | Assert.True(setupCallbackInvoked); 99 | } 100 | 101 | private string DropCreate(string tableName) 102 | { 103 | _fixture.ExecuteNonQuery(_connectionString, $"DROP TABLE IF EXISTS [dbo].[{tableName}]"); 104 | 105 | _fixture.ExecuteNonQuery(_connectionString, 106 | "CREATE TABLE [dbo].[" + tableName + "](" + 107 | "[Id] [int] IDENTITY(1,1) NOT NULL," + 108 | "[Name] [nvarchar](50) NULL," + 109 | "CONSTRAINT [PK_" + tableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + 110 | ")"); 111 | 112 | return tableName; 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Steps/IEtlPipelineStep.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using BulkWriter.Pipeline.Transforms; 4 | using Microsoft.Extensions.Logging; 5 | 6 | namespace BulkWriter.Pipeline.Steps 7 | { 8 | /// 9 | /// Fluent configuration interface for an EtlPipeline 10 | /// 11 | /// Type of input objects to this pipeline step 12 | /// Type of output objects to this pipeline step 13 | public interface IEtlPipelineStep 14 | { 15 | /// 16 | /// Configures an aggregation step in the pipeline 17 | /// 18 | /// Type of the next output object in the pipeline 19 | /// Object that will perform the aggregation of multiple input objects to a single output 20 | /// Next step in the pipeline to be configured 21 | IEtlPipelineStep Aggregate(IAggregator aggregator); 22 | 23 | /// 24 | /// Configures an aggregation step in the pipeline 25 | /// 26 | /// Type of the next output object in the pipeline 27 | /// Func that performs the aggregation of multiple input objects to a single output 28 | /// Next step in the pipeline to be configured 29 | IEtlPipelineStep Aggregate(Func, TNextOut> aggregationFunc); 30 | 31 | /// 32 | /// Configures a pivot step in the pipeline 33 | /// 34 | /// Type of the next output object in the pipeline 35 | /// Object that will pivot each input object to multiple output objects 36 | /// Next step in the pipeline to be configured 37 | IEtlPipelineStep Pivot(IPivot pivot); 38 | 39 | /// 40 | /// Configures a pivot step in the pipeline 41 | /// 42 | /// Type of the next output object in the pipeline 43 | /// Func that will pivot each input object to multiple output objects 44 | /// Next step in the pipeline to be configured 45 | IEtlPipelineStep Pivot(Func> pivotFunc); 46 | 47 | /// 48 | /// Configures a projection step in the pipeline 49 | /// 50 | /// Type of the next output object in the pipeline 51 | /// Object that will project an input object to a new output type 52 | /// Next step in the pipeline to be configured 53 | IEtlPipelineStep Project(IProjector projector); 54 | 55 | /// 56 | /// Configures a projection step in the pipeline 57 | /// 58 | /// Type of the next output object in the pipeline 59 | /// Func that will project an input object to a new output type 60 | /// Next step in the pipeline to be configured 61 | IEtlPipelineStep Project(Func projectionFunc); 62 | 63 | /// 64 | /// Configures a transform step in the pipeline 65 | /// 66 | /// One or more objects that will transform input objects in place 67 | /// Next step in the pipeline to be configured 68 | IEtlPipelineStep TransformInPlace(params ITransformer[] transformers); 69 | 70 | /// 71 | /// Configures a transform step in the pipeline 72 | /// 73 | /// One or more actions that will transform input objects in place 74 | /// Next step in the pipeline to be configured 75 | IEtlPipelineStep TransformInPlace(params Action[] transformActions); 76 | 77 | /// 78 | /// Enables logging of the ETL pipeline internals 79 | /// 80 | /// Factory used to create a new ILogger 81 | /// Current step in the pipeline 82 | IEtlPipelineStep LogWith(ILoggerFactory loggerFactory); 83 | 84 | /// 85 | /// Configures the pipeline to write its output to a BulkWriter object; finalizes the pipeline. 86 | /// 87 | /// The BulkWriter object to which the pipeline will output 88 | /// A pipeline object that can be executed 89 | IEtlPipeline WriteTo(IBulkWriter bulkWriter); 90 | } 91 | } -------------------------------------------------------------------------------- /src/BulkWriter/BulkWriter.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.ComponentModel.DataAnnotations.Schema; 4 | using Microsoft.Data.SqlClient; 5 | using System.Linq; 6 | using System.Reflection; 7 | using System.Threading.Tasks; 8 | using BulkWriter.Internal; 9 | using System.Threading; 10 | 11 | namespace BulkWriter 12 | { 13 | /// 14 | /// Implements an object that takes an enumerable of and writes them to a target database via SqlBulkCopy 15 | /// 16 | /// Type of objects to be written to the database 17 | /// 18 | public sealed class BulkWriter : IBulkWriter 19 | { 20 | private readonly SqlBulkCopy _sqlBulkCopy; 21 | private readonly IEnumerable _propertyMappings; 22 | 23 | public BulkWriter(string connectionString) 24 | { 25 | _propertyMappings = typeof(TResult).BuildMappings(); 26 | _sqlBulkCopy = Initialize(options => new SqlBulkCopy(connectionString, options), null); 27 | } 28 | 29 | public BulkWriter(string connectionString, SqlBulkCopyOptions options) 30 | { 31 | _propertyMappings = typeof(TResult).BuildMappings(); 32 | _sqlBulkCopy = Initialize(sbcOpts => new SqlBulkCopy(connectionString, sbcOpts), options); 33 | } 34 | 35 | public BulkWriter(SqlConnection connection, SqlTransaction transaction = null) 36 | { 37 | _propertyMappings = typeof(TResult).BuildMappings(); 38 | _sqlBulkCopy = Initialize(options => new SqlBulkCopy(connection, options, transaction), null); 39 | } 40 | 41 | public BulkWriter(SqlConnection connection, SqlBulkCopyOptions options, SqlTransaction transaction = null) 42 | { 43 | _propertyMappings = typeof(TResult).BuildMappings(); 44 | _sqlBulkCopy = Initialize(sbcOpts => new SqlBulkCopy(connection, sbcOpts, transaction), options); 45 | } 46 | 47 | private SqlBulkCopy Initialize(Func createBulkCopy, SqlBulkCopyOptions? options) 48 | { 49 | SqlBulkCopyOptions sqlBulkCopyOptions; 50 | 51 | if (options == null) 52 | { 53 | var hasAnyKeys = _propertyMappings.Any(x => x.Destination.IsKey); 54 | sqlBulkCopyOptions = (hasAnyKeys ? SqlBulkCopyOptions.KeepIdentity : SqlBulkCopyOptions.Default) 55 | | SqlBulkCopyOptions.TableLock; 56 | } 57 | else 58 | { 59 | sqlBulkCopyOptions = options.Value; 60 | } 61 | 62 | var tableAttribute = typeof(TResult).GetTypeInfo().GetCustomAttribute(); 63 | var schemaName = tableAttribute?.Schema; 64 | var tableName = tableAttribute?.Name ?? typeof(TResult).Name; 65 | var destinationTableName = schemaName != null ? $"{schemaName}.{tableName}" : tableName; 66 | 67 | var sqlBulkCopy = createBulkCopy(sqlBulkCopyOptions); 68 | 69 | sqlBulkCopy.DestinationTableName = destinationTableName; 70 | sqlBulkCopy.EnableStreaming = true; 71 | sqlBulkCopy.BulkCopyTimeout = 0; 72 | 73 | foreach (var propertyMapping in _propertyMappings.Where(propertyMapping => propertyMapping.ShouldMap)) 74 | { 75 | sqlBulkCopy.ColumnMappings.Add(propertyMapping.ToColumnMapping()); 76 | } 77 | 78 | return sqlBulkCopy; 79 | } 80 | 81 | /// 82 | /// Number of rows in each batch. At the end of each batch, the rows in the batch are sent to the server. 83 | /// 84 | public int BatchSize 85 | { 86 | get => _sqlBulkCopy.BatchSize; 87 | set => _sqlBulkCopy.BatchSize = value; 88 | } 89 | 90 | /// 91 | /// Number of seconds for the BulkCopy operation to complete before it times out 92 | /// 93 | public int BulkCopyTimeout 94 | { 95 | get => _sqlBulkCopy.BulkCopyTimeout; 96 | set => _sqlBulkCopy.BulkCopyTimeout = value; 97 | } 98 | 99 | /// 100 | /// Action to allow custom configuration of the SqlBulkCopy object prior to the start of the operation. 101 | /// 102 | public Action BulkCopySetup { get; set; } = sbc => { }; 103 | 104 | public void WriteToDatabase(IEnumerable items) 105 | { 106 | BulkCopySetup(_sqlBulkCopy); 107 | 108 | using (var dataReader = new EnumerableDataReader(items, _propertyMappings)) 109 | { 110 | _sqlBulkCopy.WriteToServer(dataReader); 111 | } 112 | } 113 | 114 | public async Task WriteToDatabaseAsync(IEnumerable items, CancellationToken cancellationToken = default) 115 | { 116 | BulkCopySetup(_sqlBulkCopy); 117 | 118 | using (var dataReader = new EnumerableDataReader(items, _propertyMappings)) 119 | { 120 | await _sqlBulkCopy.WriteToServerAsync(dataReader, cancellationToken); 121 | } 122 | } 123 | 124 | #if NETSTANDARD2_1_OR_GREATER || NET6_0_OR_GREATER 125 | public async Task WriteToDatabaseAsync(IAsyncEnumerable items, CancellationToken cancellationToken = default) 126 | { 127 | BulkCopySetup(_sqlBulkCopy); 128 | 129 | await using var dataReader = new AsyncEnumerableDataReader(items, _propertyMappings); 130 | 131 | await _sqlBulkCopy.WriteToServerAsync(dataReader, cancellationToken); 132 | } 133 | #endif 134 | 135 | public void Dispose() => ((IDisposable)_sqlBulkCopy).Dispose(); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/BulkWriter/Pipeline/Internal/EtlPipelineStep.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Concurrent; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Threading; 6 | using BulkWriter.Pipeline.Steps; 7 | using BulkWriter.Pipeline.Transforms; 8 | using Microsoft.Extensions.Logging; 9 | using System.Threading.Tasks; 10 | 11 | namespace BulkWriter.Pipeline.Internal 12 | { 13 | internal abstract class EtlPipelineStepBase 14 | { 15 | protected EtlPipelineStepBase(EtlPipelineContext pipelineContext, int stepNumber) 16 | { 17 | PipelineContext = pipelineContext; 18 | OutputCollection = new BlockingCollection(); 19 | StepNumber = stepNumber; 20 | } 21 | 22 | internal readonly EtlPipelineContext PipelineContext; 23 | internal readonly BlockingCollection OutputCollection; 24 | 25 | public int StepNumber { get; protected set; } 26 | } 27 | 28 | internal abstract class EtlPipelineStep : EtlPipelineStepBase, IEtlPipelineStep, IEtlPipelineStep 29 | { 30 | internal readonly BlockingCollection InputCollection; 31 | 32 | protected EtlPipelineStep(EtlPipelineContext pipelineContext) : base(pipelineContext, 1) 33 | { 34 | InputCollection = new BlockingCollection(); 35 | } 36 | 37 | protected EtlPipelineStep(EtlPipelineStepBase previousStep) : base(previousStep.PipelineContext, previousStep.StepNumber + 1) 38 | { 39 | InputCollection = previousStep.OutputCollection; 40 | } 41 | 42 | public IEtlPipelineStep Aggregate(IAggregator aggregator) 43 | { 44 | if (aggregator == null) throw new ArgumentNullException(nameof(aggregator)); 45 | return Aggregate(aggregator.Aggregate); 46 | } 47 | 48 | public IEtlPipelineStep Aggregate(Func, TNextOut> aggregationFunc) 49 | { 50 | if (aggregationFunc == null) throw new ArgumentNullException(nameof(aggregationFunc)); 51 | 52 | var step = new AggregateEtlPipelineStep(this, aggregationFunc); 53 | PipelineContext.AddStep(step); 54 | 55 | return step; 56 | } 57 | 58 | public IEtlPipelineStep Pivot(IPivot pivot) 59 | { 60 | if (pivot == null) throw new ArgumentNullException(nameof(pivot)); 61 | return Pivot(pivot.Pivot); 62 | } 63 | 64 | public IEtlPipelineStep Pivot(Func> pivotFunc) 65 | { 66 | if (pivotFunc == null) throw new ArgumentNullException(nameof(pivotFunc)); 67 | 68 | var step = new PivotEtlPipelineStep(this, pivotFunc); 69 | PipelineContext.AddStep(step); 70 | 71 | return step; 72 | } 73 | 74 | public IEtlPipelineStep Project(IProjector projector) 75 | { 76 | if (projector == null) throw new ArgumentNullException(nameof(projector)); 77 | return Project(projector.ProjectTo); 78 | } 79 | 80 | public IEtlPipelineStep Project(Func projectionFunc) 81 | { 82 | if (projectionFunc == null) throw new ArgumentNullException(nameof(projectionFunc)); 83 | 84 | var step = new ProjectEtlPipelineStep(this, projectionFunc); 85 | PipelineContext.AddStep(step); 86 | 87 | return step; 88 | } 89 | 90 | public IEtlPipelineStep TransformInPlace(params ITransformer[] transformers) 91 | { 92 | if (transformers == null || transformers.Any(t => t == null)) throw new ArgumentNullException(nameof(transformers), @"No transformer may be null"); 93 | return TransformInPlace(transformers.Select(t => (Action)t.Transform).ToArray()); 94 | } 95 | 96 | public IEtlPipelineStep TransformInPlace(params Action[] transformActions) 97 | { 98 | if (transformActions == null || transformActions.Any(t => t == null)) throw new ArgumentNullException(nameof(transformActions), @"No transformer may be null"); 99 | 100 | var step = new TransformEtlPipelineStep(this, transformActions); 101 | PipelineContext.AddStep(step); 102 | 103 | return step; 104 | } 105 | 106 | public IEtlPipelineStep LogWith(ILoggerFactory loggerFactory) 107 | { 108 | PipelineContext.LoggerFactory = loggerFactory; 109 | return this; 110 | } 111 | 112 | public IEtlPipeline WriteTo(IBulkWriter bulkWriter) 113 | { 114 | var step = new BulkWriterEtlPipelineStep(this, bulkWriter); 115 | PipelineContext.AddStep(step); 116 | 117 | return PipelineContext.Pipeline; 118 | } 119 | 120 | protected abstract Task RunCore(CancellationToken cancellationToken); 121 | 122 | public async Task Run(CancellationToken cancellationToken) 123 | { 124 | try 125 | { 126 | var logger = PipelineContext.LoggerFactory?.CreateLogger(GetType()); 127 | 128 | try 129 | { 130 | logger?.LogInformation($"Starting pipeline step {StepNumber} of {PipelineContext.TotalSteps}"); 131 | 132 | await RunCore(cancellationToken); 133 | 134 | logger?.LogInformation($"Completing pipeline step {StepNumber} of {PipelineContext.TotalSteps}"); 135 | } 136 | 137 | catch (Exception e) 138 | { 139 | logger?.LogError(e, $"Error while running pipeline step {StepNumber} of {PipelineContext.TotalSteps}"); 140 | throw; 141 | } 142 | } 143 | 144 | finally 145 | { 146 | //This statement is in place to ensure that no matter what, the output collection 147 | //will be marked "complete". Without this, an exception in the try block above can 148 | //lead to a stalled (i.e. non-terminating) pipeline because this thread's consumer 149 | //is waiting for more output from this thread, which will never happen because the 150 | //thread is now dead. This should also ensure we get at least partial output in case 151 | //of an exception. 152 | OutputCollection.CompleteAdding(); 153 | } 154 | } 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/BulkWriter/Internal/EnumerableDataReader.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections; 3 | using System.Collections.Generic; 4 | using System.Data.Common; 5 | using System.Linq; 6 | using System.Runtime.CompilerServices; 7 | using BulkWriter.Properties; 8 | 9 | [assembly: InternalsVisibleTo("BulkWriter.Tests")] 10 | namespace BulkWriter.Internal 11 | { 12 | internal abstract class EnumerableDataReaderBase : DbDataReader 13 | { 14 | private readonly PropertyMapping[] _propertyMappings; 15 | private readonly Dictionary _nameToOrdinalMappings; 16 | private readonly Dictionary _ordinalToPropertyMappings; 17 | 18 | protected EnumerableDataReaderBase(IEnumerable propertyMappings) 19 | { 20 | _propertyMappings = propertyMappings?.OrderBy(x => x.Source.Ordinal).ToArray() ?? throw new ArgumentNullException(nameof(propertyMappings)); 21 | 22 | // Map the source entity's positional ordinals to the source/destination property mapping. 23 | _ordinalToPropertyMappings = _propertyMappings.ToDictionary(x => x.Source.Ordinal); 24 | 25 | // Map the destination table's ordinals to the source/destination property mapping, 26 | // using the source property's name as the key. 27 | _nameToOrdinalMappings = _propertyMappings.ToDictionary(x => x.Source.Property.Name, x => x.Destination.ColumnOrdinal); 28 | 29 | } 30 | 31 | public abstract TResult Current { get; } 32 | 33 | protected abstract void EnsureNotDisposed(); 34 | 35 | public override int FieldCount 36 | { 37 | get 38 | { 39 | EnsureNotDisposed(); 40 | return _propertyMappings.Length; 41 | } 42 | } 43 | 44 | public override bool HasRows => throw new NotSupportedException(); 45 | public override int Depth => throw new NotSupportedException(); 46 | public override bool IsClosed => throw new NotSupportedException(); 47 | public override int RecordsAffected => throw new NotSupportedException(); 48 | 49 | public override int GetOrdinal(string name) 50 | { 51 | EnsureNotDisposed(); 52 | 53 | if (!_nameToOrdinalMappings.TryGetValue(name, out int ordinal)) 54 | { 55 | throw new InvalidOperationException(Resources.EnumerableDataReader_GetOrdinal_NameDoesNotMapToOrdinal); 56 | } 57 | 58 | return ordinal; 59 | } 60 | 61 | public override bool IsDBNull(int i) 62 | { 63 | EnsureNotDisposed(); 64 | 65 | var value = GetValue(i); 66 | return null == value; 67 | } 68 | 69 | public override object GetValue(int i) 70 | { 71 | EnsureNotDisposed(); 72 | 73 | if (!_ordinalToPropertyMappings.TryGetValue(i, out PropertyMapping mapping)) 74 | { 75 | throw new InvalidOperationException(Resources.EnumerableDataReader_GetValue_OrdinalDoesNotMapToProperty); 76 | } 77 | 78 | var valueGetter = mapping.Source.Property.GetValueGetter(); 79 | 80 | var value = valueGetter(Current); 81 | return value; 82 | } 83 | 84 | public override string GetString(int i) 85 | { 86 | EnsureNotDisposed(); 87 | 88 | if (!_ordinalToPropertyMappings.TryGetValue(i, out PropertyMapping mapping)) 89 | { 90 | throw new InvalidOperationException(Resources.EnumerableDataReader_GetString_OrdinalDoesNotMapToProperty); 91 | } 92 | 93 | var valueGetter = mapping.Source.Property.GetValueGetter(); 94 | 95 | var value = valueGetter(Current); 96 | return value?.ToString(); 97 | } 98 | 99 | public override long GetBytes(int i, long fieldOffset, byte[] buffer, int bufferoffset, int length) 100 | { 101 | EnsureNotDisposed(); 102 | 103 | if (!_ordinalToPropertyMappings.TryGetValue(i, out PropertyMapping mapping)) 104 | { 105 | throw new InvalidOperationException(Resources.EnumerableDataReader_GetBytes_OrdinalDoesNotMapToProperty); 106 | } 107 | 108 | var valueGetter = mapping.Source.Property.GetValueGetter(); 109 | if (valueGetter(Current) is byte[] value) 110 | { 111 | var pos = Math.Max(fieldOffset, fieldOffset / buffer.Length * buffer.Length); 112 | var rest = value.Length - pos; 113 | var count = Math.Min(rest, buffer.Length); 114 | Buffer.BlockCopy(value, (int)fieldOffset, buffer, bufferoffset, (int)count); 115 | return count; 116 | } 117 | 118 | return 0; 119 | } 120 | 121 | public override string GetName(int i) 122 | { 123 | EnsureNotDisposed(); 124 | 125 | if (!_ordinalToPropertyMappings.TryGetValue(i, out PropertyMapping mapping)) 126 | { 127 | throw new InvalidOperationException(Resources.EnumerableDataReader_GetName_OrdinalDoesNotMapToName); 128 | } 129 | 130 | var name = mapping.Source.Property.Name; 131 | return name; 132 | } 133 | 134 | public override string GetDataTypeName(int i) => throw new NotSupportedException(); 135 | public override IEnumerator GetEnumerator() => throw new NotImplementedException(); 136 | public override Type GetFieldType(int i) => throw new NotSupportedException(); 137 | public override int GetValues(object[] values) => throw new NotSupportedException(); 138 | public override bool GetBoolean(int i) => throw new NotSupportedException(); 139 | public override byte GetByte(int i) => throw new NotSupportedException(); 140 | public override char GetChar(int i) => throw new NotSupportedException(); 141 | public override long GetChars(int i, long fieldoffset, char[] buffer, int bufferoffset, int length) => throw new NotSupportedException(); 142 | public override Guid GetGuid(int i) => throw new NotSupportedException(); 143 | public override short GetInt16(int i) => throw new NotSupportedException(); 144 | public override int GetInt32(int i) => throw new NotSupportedException(); 145 | public override long GetInt64(int i) => throw new NotSupportedException(); 146 | public override float GetFloat(int i) => throw new NotSupportedException(); 147 | public override double GetDouble(int i) => throw new NotSupportedException(); 148 | public override decimal GetDecimal(int i) => throw new NotSupportedException(); 149 | public override DateTime GetDateTime(int i) => throw new NotSupportedException(); 150 | public override object this[int i] => throw new NotSupportedException(); 151 | public override object this[string name] => throw new NotSupportedException(); 152 | public override bool NextResult() => throw new NotSupportedException(); 153 | } 154 | 155 | internal class EnumerableDataReader : EnumerableDataReaderBase 156 | { 157 | private readonly IEnumerable _items; 158 | 159 | private bool _disposed; 160 | private IEnumerator _enumerator; 161 | 162 | public EnumerableDataReader(IEnumerable items, IEnumerable propertyMappings) 163 | : base(propertyMappings) 164 | { 165 | _items = items ?? throw new ArgumentNullException(nameof(items)); 166 | } 167 | 168 | public override TResult Current 169 | { 170 | get 171 | { 172 | EnsureNotDisposed(); 173 | 174 | return null != _enumerator ? _enumerator.Current : default(TResult); 175 | } 176 | } 177 | 178 | public override bool Read() 179 | { 180 | EnsureNotDisposed(); 181 | 182 | if (null == _enumerator) 183 | { 184 | _enumerator = _items.GetEnumerator(); 185 | } 186 | 187 | return _enumerator.MoveNext(); 188 | } 189 | 190 | 191 | protected override void Dispose(bool disposing) 192 | { 193 | base.Dispose(disposing); 194 | 195 | if (disposing) 196 | { 197 | if (null != _enumerator) 198 | { 199 | _enumerator.Dispose(); 200 | _enumerator = null; 201 | } 202 | 203 | _disposed = true; 204 | } 205 | } 206 | 207 | protected override void EnsureNotDisposed() 208 | { 209 | if (_disposed) 210 | { 211 | throw new ObjectDisposedException("EnumerableDataReader"); 212 | } 213 | } 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # Remove the line below if you want to inherit .editorconfig settings from higher directories 2 | root = true 3 | 4 | # C# files 5 | [*.cs] 6 | 7 | #### Core EditorConfig Options #### 8 | 9 | # Indentation and spacing 10 | indent_size = 4 11 | indent_style = space 12 | tab_width = 4 13 | 14 | # New line preferences 15 | end_of_line = crlf 16 | insert_final_newline = true 17 | 18 | #### .NET Coding Conventions #### 19 | 20 | # Organize usings 21 | dotnet_separate_import_directive_groups = false 22 | dotnet_sort_system_directives_first = false 23 | 24 | # this. and Me. preferences 25 | dotnet_style_qualification_for_event = false:silent 26 | dotnet_style_qualification_for_field = false:silent 27 | dotnet_style_qualification_for_method = false:silent 28 | dotnet_style_qualification_for_property = false:silent 29 | 30 | # Language keywords vs BCL types preferences 31 | dotnet_style_predefined_type_for_locals_parameters_members = true:silent 32 | dotnet_style_predefined_type_for_member_access = true:silent 33 | 34 | # Parentheses preferences 35 | dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:silent 36 | dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:silent 37 | dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent 38 | dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:silent 39 | 40 | # Modifier preferences 41 | dotnet_style_require_accessibility_modifiers = for_non_interface_members:silent 42 | 43 | # Expression-level preferences 44 | dotnet_style_coalesce_expression = true:suggestion 45 | dotnet_style_collection_initializer = true:suggestion 46 | dotnet_style_explicit_tuple_names = true:suggestion 47 | dotnet_style_null_propagation = true:suggestion 48 | dotnet_style_object_initializer = true:suggestion 49 | dotnet_style_prefer_auto_properties = true:silent 50 | dotnet_style_prefer_compound_assignment = true:suggestion 51 | dotnet_style_prefer_conditional_expression_over_assignment = true:silent 52 | dotnet_style_prefer_conditional_expression_over_return = true:silent 53 | dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion 54 | dotnet_style_prefer_inferred_tuple_names = true:suggestion 55 | dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion 56 | dotnet_style_prefer_simplified_interpolation = true:suggestion 57 | 58 | # Field preferences 59 | dotnet_style_readonly_field = true:suggestion 60 | 61 | # Parameter preferences 62 | dotnet_code_quality_unused_parameters = all:suggestion 63 | 64 | #### C# Coding Conventions #### 65 | 66 | # var preferences 67 | csharp_style_var_elsewhere = false:silent 68 | csharp_style_var_for_built_in_types = false:silent 69 | csharp_style_var_when_type_is_apparent = false:silent 70 | 71 | # Expression-bodied members 72 | csharp_style_expression_bodied_accessors = true:silent 73 | csharp_style_expression_bodied_constructors = false:silent 74 | csharp_style_expression_bodied_indexers = true:silent 75 | csharp_style_expression_bodied_lambdas = true:silent 76 | csharp_style_expression_bodied_local_functions = false:silent 77 | csharp_style_expression_bodied_methods = false:silent 78 | csharp_style_expression_bodied_operators = false:silent 79 | csharp_style_expression_bodied_properties = true:silent 80 | 81 | # Pattern matching preferences 82 | csharp_style_pattern_matching_over_as_with_null_check = true:suggestion 83 | csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion 84 | csharp_style_prefer_switch_expression = true:suggestion 85 | 86 | # Null-checking preferences 87 | csharp_style_conditional_delegate_call = true:suggestion 88 | 89 | # Modifier preferences 90 | csharp_prefer_static_local_function = true:suggestion 91 | csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:silent 92 | 93 | # Code-block preferences 94 | csharp_prefer_braces = true:silent 95 | csharp_prefer_simple_using_statement = true:suggestion 96 | 97 | # Expression-level preferences 98 | csharp_prefer_simple_default_expression = true:suggestion 99 | csharp_style_deconstructed_variable_declaration = true:suggestion 100 | csharp_style_inlined_variable_declaration = true:suggestion 101 | csharp_style_pattern_local_over_anonymous_function = true:suggestion 102 | csharp_style_prefer_index_operator = true:suggestion 103 | csharp_style_prefer_range_operator = true:suggestion 104 | csharp_style_throw_expression = true:suggestion 105 | csharp_style_unused_value_assignment_preference = discard_variable:suggestion 106 | csharp_style_unused_value_expression_statement_preference = discard_variable:silent 107 | 108 | # 'using' directive preferences 109 | csharp_using_directive_placement = outside_namespace:silent 110 | 111 | #### C# Formatting Rules #### 112 | 113 | # New line preferences 114 | csharp_new_line_before_catch = true 115 | csharp_new_line_before_else = true 116 | csharp_new_line_before_finally = true 117 | csharp_new_line_before_members_in_anonymous_types = true 118 | csharp_new_line_before_members_in_object_initializers = true 119 | csharp_new_line_before_open_brace = all 120 | csharp_new_line_between_query_expression_clauses = true 121 | 122 | # Indentation preferences 123 | csharp_indent_block_contents = true 124 | csharp_indent_braces = false 125 | csharp_indent_case_contents = true 126 | csharp_indent_case_contents_when_block = true 127 | csharp_indent_labels = one_less_than_current 128 | csharp_indent_switch_labels = true 129 | 130 | # Space preferences 131 | csharp_space_after_cast = false 132 | csharp_space_after_colon_in_inheritance_clause = true 133 | csharp_space_after_comma = true 134 | csharp_space_after_dot = false 135 | csharp_space_after_keywords_in_control_flow_statements = true 136 | csharp_space_after_semicolon_in_for_statement = true 137 | csharp_space_around_binary_operators = before_and_after 138 | csharp_space_around_declaration_statements = false 139 | csharp_space_before_colon_in_inheritance_clause = true 140 | csharp_space_before_comma = false 141 | csharp_space_before_dot = false 142 | csharp_space_before_open_square_brackets = false 143 | csharp_space_before_semicolon_in_for_statement = false 144 | csharp_space_between_empty_square_brackets = false 145 | csharp_space_between_method_call_empty_parameter_list_parentheses = false 146 | csharp_space_between_method_call_name_and_opening_parenthesis = false 147 | csharp_space_between_method_call_parameter_list_parentheses = false 148 | csharp_space_between_method_declaration_empty_parameter_list_parentheses = false 149 | csharp_space_between_method_declaration_name_and_open_parenthesis = false 150 | csharp_space_between_method_declaration_parameter_list_parentheses = false 151 | csharp_space_between_parentheses = false 152 | csharp_space_between_square_brackets = false 153 | 154 | # Wrapping preferences 155 | csharp_preserve_single_line_blocks = true 156 | csharp_preserve_single_line_statements = true 157 | 158 | #### Naming styles #### 159 | 160 | # Naming rules 161 | dotnet_naming_rule.interface_should_be_begins_with_i.severity = suggestion 162 | dotnet_naming_rule.interface_should_be_begins_with_i.symbols = interface 163 | dotnet_naming_rule.interface_should_be_begins_with_i.style = begins_with_i 164 | 165 | dotnet_naming_rule.types_should_be_pascal_case.severity = suggestion 166 | dotnet_naming_rule.types_should_be_pascal_case.symbols = types 167 | dotnet_naming_rule.types_should_be_pascal_case.style = pascal_case 168 | 169 | dotnet_naming_rule.non_field_members_should_be_pascal_case.severity = suggestion 170 | dotnet_naming_rule.non_field_members_should_be_pascal_case.symbols = non_field_members 171 | dotnet_naming_rule.non_field_members_should_be_pascal_case.style = pascal_case 172 | 173 | # Symbol specifications 174 | dotnet_naming_symbols.interface.applicable_kinds = interface 175 | dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected 176 | dotnet_naming_symbols.interface.required_modifiers = 177 | 178 | dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum 179 | dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected 180 | dotnet_naming_symbols.types.required_modifiers = 181 | 182 | dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method 183 | dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected 184 | dotnet_naming_symbols.non_field_members.required_modifiers = 185 | 186 | # Naming styles 187 | dotnet_naming_style.pascal_case.required_prefix = 188 | dotnet_naming_style.pascal_case.required_suffix = 189 | dotnet_naming_style.pascal_case.word_separator = 190 | dotnet_naming_style.pascal_case.capitalization = pascal_case 191 | 192 | dotnet_naming_style.begins_with_i.required_prefix = I 193 | dotnet_naming_style.begins_with_i.required_suffix = 194 | dotnet_naming_style.begins_with_i.word_separator = 195 | dotnet_naming_style.begins_with_i.capitalization = pascal_case -------------------------------------------------------------------------------- /docs/features/etlpipeline.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: EtlPipeline 4 | parent: Features 5 | nav_order: 3 6 | --- 7 | # EtlPipeline 8 | 9 | The `EtlPipeline` class exposes a fluent interface for configuring your processing pipeline. The pipeline configuration will look roughly like this: 10 | 11 | - [Start the Pipeline](#starting-the-pipeline-configuration) 12 | - Apply optional [transformations](#applying-transformations) 13 | - Apply optional [configuration](#configure-logging) 14 | - [Write to BulkWriter](#writing-to-bulkwriter) 15 | - [Execute the Pipeline](#execute-the-pipeline) 16 | - [Handle Errors](#handle-errors) 17 | 18 | See the [Advanced Pipelining example](../examples/advanced-pipelining.md) for a full pipeline implementation. 19 | 20 | ## Starting the pipeline configuration 21 | 22 | Start pipeline configuration with a call to 23 | 24 | ```csharp 25 | IEtlPipelineStep StartWith(IEnumerable input) 26 | ``` 27 | 28 | ## Applying Transformations 29 | 30 | With transformations we can manipulate data prior to writing to the data store. 31 | 32 | ### Aggregate 33 | 34 | `Aggregate` takes multiple records and outputs a single record. 35 | 36 | ```csharp 37 | IEtlPipelineStep Aggregate(IAggregator aggregator); 38 | IEtlPipelineStep Aggregate(Func, TNextOut> aggregationFunc); 39 | ``` 40 | 41 | **Example:** 42 | 43 | ```csharp 44 | using (var writer = new BulkWriter(connectionString)) 45 | { 46 | var items = Enumerable.Range(1, 1000).Select(i => new MyClass { Id = i, Name = "Bob", WeightInKg = 82 }); 47 | var pipeline = EtlPipeline.StartWith(items) 48 | .Aggregate(f => f.Sum(c => c.WeightInKg)) 49 | .WriteTo(writer); 50 | 51 | await pipeline.ExecuteAsync(); 52 | } 53 | ``` 54 | 55 | **Output:** 56 | 57 | `82,000` 58 | 59 | ### Pivot 60 | 61 | `Pivot` turns one record into many. 62 | 63 | ```csharp 64 | IEtlPipelineStep Pivot(IPivot pivot); 65 | IEtlPipelineStep Pivot(Func> pivotFunc); 66 | ``` 67 | 68 | **Performance Note:** 69 | It's not possible to `yield return` from an anonymous method in C#. Since the `Pivot` method returns an `IEnumerable`, you'll almost certainly want to write a class that implements the `IPivot` interface rather than pass in a `Func`. Otherwise, you may lose the benefit of streaming records through the `IEnumerable` rather that create them all in memory before your step can write to its output collection. 70 | 71 | **Example:** 72 | 73 | ```csharp 74 | public class BobFromIdPivot : IPivot 75 | { 76 | public IEnumerable Pivot(int i) 77 | { 78 | for (var j = 1; j <= i; j++) 79 | { 80 | yield return new MyEntity { Id = j, Name = $"Bob {j}" }; 81 | } 82 | } 83 | } 84 | 85 | using (var writer = new BulkWriter(connectionString)) 86 | { 87 | var idCounter = 0; 88 | var items = Enumerable.Range(1, 3).ToList(); 89 | var pipeline = EtlPipeline.StartWith(items) 90 | .Pivot(new BobPivot()) 91 | .WriteTo(writer); 92 | 93 | await pipeline.ExecuteAsync(); 94 | } 95 | ``` 96 | 97 | **Output:** 98 | 99 | | Id | Name | 100 | |:----|:------| 101 | | 1 | Bob 1 | 102 | | 2 | Bob 2 | 103 | | 3 | Bob 3 | 104 | | 4 | Bob 4 | 105 | | 5 | Bob 5 | 106 | | 6 | Bob 6 | 107 | 108 | ### Project 109 | 110 | `Project` can translate your current type into a new type. 111 | 112 | ```csharp 113 | IEtlPipelineStep Project(IProjector projector); 114 | IEtlPipelineStep Project(Func projectionFunc); 115 | ``` 116 | 117 | **Example:** 118 | 119 | ```csharp 120 | using (var writer = new BulkWriter(connectionString)) 121 | { 122 | var items = Enumerable.Range(1, 1000).Select(i => new MyOtherClass { Id = i, FirstName = "Bob", LastName = $"{i}" ); 123 | var pipeline = EtlPipeline 124 | .StartWith(items) 125 | .Project(i => new MyClass { Id = i.Id, Name = $"{i.FirstName} {i.LastName}" }) 126 | .WriteTo(writer); 127 | 128 | await pipeline.ExecuteAsync(); 129 | } 130 | ``` 131 | 132 | **Output:** 133 | 134 | | Id | Name | 135 | |:-----|:---------| 136 | | 1 | Bob 1 | 137 | | 2 | Bob 2 | 138 | | 3 | Bob 3 | 139 | | ... | ... | 140 | | 998 | Bob 998 | 141 | | 999 | Bob 999 | 142 | | 1000 | Bob 1000 | 143 | 144 | ### Transform 145 | 146 | `Transform` applies changes to objects in-place as they stream through. Multiple transforms may be applied in a single step, if desired. 147 | 148 | ```csharp 149 | IEtlPipelineStep TransformInPlace(params ITransformer[] transformers); 150 | IEtlPipelineStep TransformInPlace(params Action[] transformActions); 151 | ``` 152 | 153 | **Example:** 154 | 155 | ```csharp 156 | using (var writer = new BulkWriter(connectionString)) 157 | { 158 | var items = Enumerable.Range(1, 1000).Select(i => new MyClass { Id = i, Name = "Bob", WeightInKg = 80 }); 159 | var pipeline = EtlPipeline 160 | .StartWith(items) 161 | .TransformInPlace(i => 162 | { 163 | i.WeightInLbs = i.WeightInKg * 2.205; 164 | }) 165 | .WriteTo(writer); 166 | 167 | await pipeline.ExecuteAsync(); 168 | } 169 | ``` 170 | 171 | **Output:** 172 | 173 | | Id | Name | WeightInKg | WeightInLbs | 174 | |:-----|:-----|:-----------|-------------| 175 | | 1 | Bob | 80 | 176.4 | 176 | | 2 | Bob | 80 | 176.4 | 177 | | 3 | Bob | 80 | 176.4 | 178 | | ... | ... | ... | ... | 179 | | 998 | Bob | 80 | 176.4 | 180 | | 999 | Bob | 80 | 176.4 | 181 | | 1000 | Bob | 80 | 176.4 | 182 | 183 | ## Configure Logging 184 | 185 | Logging is configured via the `LogWith(ILoggerFactory)` method, where `ILoggerFactory` is from the `Microsoft.Extensions.Logging` library. 186 | 187 | ```csharp 188 | IEtlPipelineStep LogWith(ILoggerFactory loggerFactory); 189 | ``` 190 | 191 | This will log the start, stop and any exceptions thrown by each step in your pipeline. If you need logging inside the code you provide to actually transform your data, you should either capture a logger instance in each `Action` or `Func` passed to your pipeline config, or add logger instances inside your implementations of the transform interfaces. 192 | 193 | ## Writing to BulkWriter 194 | 195 | Finish up pipeline configuration by calling `WriteTo` 196 | 197 | ```csharp 198 | IEtlPipeline WriteTo(IBulkWriter bulkWriter); 199 | ``` 200 | 201 | **Example:** 202 | 203 | ```csharp 204 | using (var writer = new BulkWriter(connectionString)) 205 | { 206 | var items = Enumerable.Range(1, 1000).Select(i => new MyClass { Id = i, Name = "Bob", WeightInKg = 80 }); 207 | var pipeline = EtlPipeline 208 | .StartWith(items) 209 | .WriteTo(writer); 210 | 211 | await pipeline.ExecuteAsync(); 212 | } 213 | ``` 214 | 215 | ## Execute the Pipeline 216 | 217 | After calling the `WriteTo(BulkWriter)` method, you'll have an instance of an `IEtlPipeline` object. Execute the pipeline by calling `ExecuteAsync`. Each step in your pipeline (including `StartWith` and `WriteTo`) will run in its own separate `Task`. The task returned by the call to `ExecuteAsync` will wait for all of the child tasks to complete before returning. 218 | 219 | ```csharp 220 | Task ExecuteAsync(); 221 | Task ExecuteAsync(CancellationToken cancellationToken); 222 | ``` 223 | 224 | **Example:** 225 | 226 | ```csharp 227 | using (var writer = new BulkWriter(connectionString)) 228 | { 229 | var items = Enumerable.Range(1, 1000).Select(i => new MyClass { Id = i, Name = "Bob", WeightInKg = 80 }); 230 | var pipeline = EtlPipeline 231 | .StartWith(items) 232 | .WriteTo(writer); 233 | 234 | await pipeline.ExecuteAsync(cancellationToken); 235 | } 236 | ``` 237 | 238 | ## Handle Errors 239 | 240 | Since each step in the pipeline runs under the parent task returned by `ExecuteAsync`, you can examine the parent `Task.Exception.InnerExceptions` property for all exceptions that may have been thrown when the pipeline was executed. Any records that streamed through to the `BulkWriter` before the exception halted the pipeline will be written to the database. 241 | 242 | ```csharp 243 | using (var writer = new BulkWriter(connectionString)) 244 | { 245 | var items = Enumerable.Range(1, 1000).Select(i => new MyClass { Id = i, Name = "Bob", WeightInKg = 80 }); 246 | var pipeline = EtlPipeline 247 | .StartWith(items) 248 | .WriteTo(writer); 249 | 250 | var pipelineExecutionTask = pipeline.ExecuteAsync(cancellationToken); 251 | try 252 | { 253 | await pipelineExecutionTask; 254 | } 255 | 256 | catch (Exception e) 257 | { 258 | //e will contain the first exception thrown by a pipeline step 259 | 260 | //pipelineExecutionTask.Exception is of type AggregateException 261 | //Its InnerException property will have all exceptions that were thrown 262 | } 263 | } 264 | ``` 265 | -------------------------------------------------------------------------------- /src/BulkWriter/Properties/Resources.resx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | text/microsoft-resx 110 | 111 | 112 | 2.0 113 | 114 | 115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 116 | 117 | 118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 119 | 120 | 121 | The provided ordinal cannot be less than 0. 122 | 123 | 124 | The provided connection string cannot be an empty string. 125 | 126 | 127 | The provided colum name mapping cannot be an empty string. 128 | 129 | 130 | The provided ordinal mapping cannot be less than 0. 131 | 132 | 133 | The provided column size mapping cannot be less than 0. 134 | 135 | 136 | The provided data type mapping cannot be an empty string. 137 | 138 | 139 | The provided destination table cannot be an empty string. 140 | 141 | 142 | The expression must contain a MemberAccessExpression to a property (t => t.Property). 143 | 144 | 145 | The current mapping does not match a column in the destination table. The proposed source property is '{0}' and the proposed destination column is '{1}'. 146 | 147 | 148 | The ordinal does not map to a name. 149 | 150 | 151 | The ordinal does not map to a property. 152 | 153 | 154 | The name does not map to a ordinal. 155 | 156 | 157 | The property '{0}' is not set. Use the 'IsPropertySet' method to check if a property has been set. 158 | 159 | 160 | The supplied property is not declared on a type. 161 | 162 | 163 | The ordinal does not map to property. 164 | 165 | 166 | The ordinal does not map to property. 167 | 168 | -------------------------------------------------------------------------------- /src/BulkWriter/Properties/Resources.Designer.cs: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // 3 | // This code was generated by a tool. 4 | // Runtime Version:4.0.30319.42000 5 | // 6 | // Changes to this file may cause incorrect behavior and will be lost if 7 | // the code is regenerated. 8 | // 9 | //------------------------------------------------------------------------------ 10 | 11 | using System.Reflection; 12 | 13 | namespace BulkWriter.Properties { 14 | using System; 15 | 16 | 17 | /// 18 | /// A strongly-typed resource class, for looking up localized strings, etc. 19 | /// 20 | // This class was auto-generated by the StronglyTypedResourceBuilder 21 | // class via a tool like ResGen or Visual Studio. 22 | // To add or remove a member, edit your .ResX file then rerun ResGen 23 | // with the /str option, or rebuild your VS project. 24 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")] 25 | [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] 26 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] 27 | internal class Resources { 28 | 29 | private static global::System.Resources.ResourceManager resourceMan; 30 | 31 | private static global::System.Globalization.CultureInfo resourceCulture; 32 | 33 | [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] 34 | internal Resources() { 35 | } 36 | 37 | /// 38 | /// Returns the cached ResourceManager instance used by this class. 39 | /// 40 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] 41 | internal static global::System.Resources.ResourceManager ResourceManager { 42 | get { 43 | if (object.ReferenceEquals(resourceMan, null)) { 44 | global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("BulkWriter.Properties.Resources", typeof(Resources).GetTypeInfo().Assembly); 45 | resourceMan = temp; 46 | } 47 | return resourceMan; 48 | } 49 | } 50 | 51 | /// 52 | /// Overrides the current thread's CurrentUICulture property for all 53 | /// resource lookups using this strongly typed resource class. 54 | /// 55 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] 56 | internal static global::System.Globalization.CultureInfo Culture { 57 | get { 58 | return resourceCulture; 59 | } 60 | set { 61 | resourceCulture = value; 62 | } 63 | } 64 | 65 | /// 66 | /// Looks up a localized string similar to The current mapping does not match a column in the destination table. The proposed source property is '{0}' and the proposed destination column is '{1}'.. 67 | /// 68 | internal static string AutoDiscover_Mappings_MappingDoesNotMatchDbColumn { 69 | get { 70 | return ResourceManager.GetString("AutoDiscover_Mappings_MappingDoesNotMatchDbColumn", resourceCulture); 71 | } 72 | } 73 | 74 | /// 75 | /// Looks up a localized string similar to The ordinal does not map to a name.. 76 | /// 77 | internal static string EnumerableDataReader_GetName_OrdinalDoesNotMapToName { 78 | get { 79 | return ResourceManager.GetString("EnumerableDataReader_GetName_OrdinalDoesNotMapToName", resourceCulture); 80 | } 81 | } 82 | 83 | /// 84 | /// Looks up a localized string similar to The name does not map to a ordinal.. 85 | /// 86 | internal static string EnumerableDataReader_GetOrdinal_NameDoesNotMapToOrdinal { 87 | get { 88 | return ResourceManager.GetString("EnumerableDataReader_GetOrdinal_NameDoesNotMapToOrdinal", resourceCulture); 89 | } 90 | } 91 | 92 | /// 93 | /// Looks up a localized string similar to The ordinal does not map to a property.. 94 | /// 95 | internal static string EnumerableDataReader_GetValue_OrdinalDoesNotMapToProperty { 96 | get { 97 | return ResourceManager.GetString("EnumerableDataReader_GetValue_OrdinalDoesNotMapToProperty", resourceCulture); 98 | } 99 | } 100 | 101 | /// 102 | /// Looks up a localized string similar to The expression must contain a MemberAccessExpression to a property (t => t.Property).. 103 | /// 104 | internal static string MapBuilderContext_ExtractPropertyInfo_InvalidPropertySelector { 105 | get { 106 | return ResourceManager.GetString("MapBuilderContext_ExtractPropertyInfo_InvalidPropertySelector", resourceCulture); 107 | } 108 | } 109 | 110 | /// 111 | /// Looks up a localized string similar to The provided destination table cannot be an empty string.. 112 | /// 113 | internal static string MapBuilderContext_ToDestinationTable_InvalidTableName { 114 | get { 115 | return ResourceManager.GetString("MapBuilderContext_ToDestinationTable_InvalidTableName", resourceCulture); 116 | } 117 | } 118 | 119 | /// 120 | /// Looks up a localized string similar to The provided colum name mapping cannot be an empty string.. 121 | /// 122 | internal static string MapBuilderContextMap_ToColumnName_InvalidColumName { 123 | get { 124 | return ResourceManager.GetString("MapBuilderContextMap_ToColumnName_InvalidColumName", resourceCulture); 125 | } 126 | } 127 | 128 | /// 129 | /// Looks up a localized string similar to The provided ordinal mapping cannot be less than 0.. 130 | /// 131 | internal static string MapBuilderContextMap_ToColumnOrdinal_InvalidOrdinal { 132 | get { 133 | return ResourceManager.GetString("MapBuilderContextMap_ToColumnOrdinal_InvalidOrdinal", resourceCulture); 134 | } 135 | } 136 | 137 | /// 138 | /// Looks up a localized string similar to The provided column size mapping cannot be less than 0.. 139 | /// 140 | internal static string MapBuilderContextMap_ToColumnSize_InvalidColumnSize { 141 | get { 142 | return ResourceManager.GetString("MapBuilderContextMap_ToColumnSize_InvalidColumnSize", resourceCulture); 143 | } 144 | } 145 | 146 | /// 147 | /// Looks up a localized string similar to The provided data type mapping cannot be an empty string.. 148 | /// 149 | internal static string MapBuilderContextMap_ToDataTypeName_InvalidName { 150 | get { 151 | return ResourceManager.GetString("MapBuilderContextMap_ToDataTypeName_InvalidName", resourceCulture); 152 | } 153 | } 154 | 155 | /// 156 | /// Looks up a localized string similar to The provided connection string cannot be an empty string.. 157 | /// 158 | internal static string Mapping_CreateBulkWriter_InvalidConnectionString { 159 | get { 160 | return ResourceManager.GetString("Mapping_CreateBulkWriter_InvalidConnectionString", resourceCulture); 161 | } 162 | } 163 | 164 | /// 165 | /// Looks up a localized string similar to The property '{0}' is not set. Use the 'IsPropertySet' method to check if a property has been set.. 166 | /// 167 | internal static string MappingDestination_GetPropertyValue_PropertyNotSet { 168 | get { 169 | return ResourceManager.GetString("MappingDestination_GetPropertyValue_PropertyNotSet", resourceCulture); 170 | } 171 | } 172 | 173 | /// 174 | /// Looks up a localized string similar to The provided ordinal cannot be less than 0.. 175 | /// 176 | internal static string MappingSource_InvalidOrdinal { 177 | get { 178 | return ResourceManager.GetString("MappingSource_InvalidOrdinal", resourceCulture); 179 | } 180 | } 181 | 182 | /// 183 | /// Looks up a localized string similar to The supplied property is not declared on a type.. 184 | /// 185 | internal static string PropertyInfoExtensions_PropertyNotDeclaredOnType { 186 | get { 187 | return ResourceManager.GetString("PropertyInfoExtensions_PropertyNotDeclaredOnType", resourceCulture); 188 | } 189 | } 190 | 191 | /// 192 | /// Looks up a localized string similar to The supplied property is not declared on a type.. 193 | /// 194 | internal static string EnumerableDataReader_GetString_OrdinalDoesNotMapToProperty 195 | { 196 | get 197 | { 198 | return ResourceManager.GetString("EnumerableDataReader_GetString_OrdinalDoesNotMapToProperty", resourceCulture); 199 | } 200 | } 201 | 202 | /// 203 | /// Looks up a localized string similar to The supplied property is not declared on a type.. 204 | /// 205 | internal static string EnumerableDataReader_GetBytes_OrdinalDoesNotMapToProperty 206 | { 207 | get 208 | { 209 | return ResourceManager.GetString("EnumerableDataReader_GetBytes_OrdinalDoesNotMapToProperty", resourceCulture); 210 | } 211 | } 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /src/BulkWriter.Tests/BulkWriterTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.ComponentModel.DataAnnotations; 3 | using System.ComponentModel.DataAnnotations.Schema; 4 | using Microsoft.Data.SqlClient; 5 | using System.Linq; 6 | using System.Threading.Tasks; 7 | using Xunit; 8 | 9 | namespace BulkWriter.Tests 10 | { 11 | [Collection(nameof(DbContainerFixture))] 12 | public class BulkWriterTests 13 | { 14 | private readonly DbContainerFixture _fixture; 15 | 16 | public BulkWriterTests(DbContainerFixture fixture) => _fixture = fixture; 17 | 18 | public class BulkWriterTestsMyTestClass 19 | { 20 | public int Id { get; set; } 21 | 22 | public string Name { get; set; } 23 | } 24 | 25 | public class BulkWriterTestsMyTestClassWithKey 26 | { 27 | [Key] 28 | public int Id { get; set; } 29 | 30 | public string Name { get; set; } 31 | } 32 | 33 | [Fact] 34 | public async Task CanWriteSync() 35 | { 36 | string tableName = _fixture.DropCreate(nameof(BulkWriterTestsMyTestClass)); 37 | 38 | var writer = new BulkWriter(_fixture.TestConnectionString); 39 | 40 | var items = Enumerable.Range(1, 1000).Select(i => new BulkWriterTestsMyTestClass { Id = i, Name = "Bob" }); 41 | 42 | writer.WriteToDatabase(items); 43 | 44 | var count = (int)await _fixture.ExecuteScalar($"SELECT COUNT(1) FROM {tableName}"); 45 | 46 | Assert.Equal(1000, count); 47 | } 48 | 49 | [Fact] 50 | public async Task CanWriteSyncWithOptions() 51 | { 52 | var tableName = _fixture.DropCreate(nameof(BulkWriterTestsMyTestClass)); 53 | var tableNameWithKey = _fixture.DropCreate(nameof(BulkWriterTestsMyTestClassWithKey)); 54 | 55 | var writer = new BulkWriter(_fixture.TestConnectionString); 56 | var writerWithOptions = new BulkWriter(_fixture.TestConnectionString, SqlBulkCopyOptions.KeepIdentity); 57 | 58 | var items = Enumerable.Range(11, 20).Select(i => new BulkWriterTestsMyTestClass { Id = i, Name = "Bob" }); 59 | var itemsWithKey = Enumerable.Range(11, 20).Select(i => new BulkWriterTestsMyTestClassWithKey { Id = i, Name = "Bob" }); 60 | 61 | writer.WriteToDatabase(items); 62 | writerWithOptions.WriteToDatabase(itemsWithKey); 63 | 64 | var minId = (int)await _fixture.ExecuteScalar($"SELECT MIN(Id) FROM {tableName}"); 65 | var minIdWithKey = (int)await _fixture.ExecuteScalar($"SELECT MIN(Id) FROM {tableNameWithKey}"); 66 | 67 | Assert.Equal(1, minId); 68 | Assert.Equal(11, minIdWithKey); 69 | } 70 | 71 | [Fact] 72 | public async Task CanWriteSyncWithExistingConnection() 73 | { 74 | string tableName = _fixture.DropCreate(nameof(BulkWriterTestsMyTestClass)); 75 | 76 | using (var connection = new SqlConnection(_fixture.TestConnectionString)) 77 | { 78 | await connection.OpenAsync(); 79 | 80 | var writer = new BulkWriter(connection); 81 | 82 | var items = Enumerable.Range(1, 1000) 83 | .Select(i => new BulkWriterTestsMyTestClass { Id = i, Name = "Bob" }); 84 | 85 | writer.WriteToDatabase(items); 86 | 87 | var count = (int)await _fixture.ExecuteScalar(connection, $"SELECT COUNT(1) FROM {tableName}"); 88 | 89 | Assert.Equal(1000, count); 90 | } 91 | } 92 | 93 | [Fact] 94 | public async Task CanWriteSyncWithExistingConnectionAndTransaction() 95 | { 96 | string tableName = _fixture.DropCreate(nameof(BulkWriterTestsMyTestClass)); 97 | 98 | using (var connection = new SqlConnection(_fixture.TestConnectionString)) 99 | { 100 | await connection.OpenAsync(); 101 | 102 | using (var transaction = connection.BeginTransaction()) 103 | { 104 | 105 | var writer = new BulkWriter(connection, transaction); 106 | 107 | var items = Enumerable.Range(1, 1000) 108 | .Select(i => new BulkWriterTestsMyTestClass { Id = i, Name = "Bob" }); 109 | 110 | writer.WriteToDatabase(items); 111 | 112 | var count = (int)await _fixture.ExecuteScalar(connection, $"SELECT COUNT(1) FROM {tableName}", transaction); 113 | 114 | Assert.Equal(1000, count); 115 | 116 | transaction.Rollback(); 117 | 118 | count = (int)await _fixture.ExecuteScalar(connection, $"SELECT COUNT(1) FROM {tableName}"); 119 | 120 | Assert.Equal(0, count); 121 | } 122 | } 123 | } 124 | 125 | [Fact] 126 | public async Task CanWriteSyncWithExistingConnectionAndTransactionAndOptions() 127 | { 128 | var tableName = _fixture.DropCreate(nameof(BulkWriterTestsMyTestClass)); 129 | var tableNameWithKey = _fixture.DropCreate(nameof(BulkWriterTestsMyTestClassWithKey)); 130 | 131 | using (var connection = new SqlConnection(_fixture.TestConnectionString)) 132 | { 133 | await connection.OpenAsync(); 134 | 135 | using (var transaction = connection.BeginTransaction()) 136 | { 137 | var writer = new BulkWriter(connection, transaction); 138 | var writerWithOptions = new BulkWriter(connection, SqlBulkCopyOptions.KeepIdentity, transaction); 139 | 140 | var items = Enumerable.Range(11, 20).Select(i => new BulkWriterTestsMyTestClass { Id = i, Name = "Bob" }); 141 | var itemsWithKey = Enumerable.Range(11, 20).Select(i => new BulkWriterTestsMyTestClassWithKey { Id = i, Name = "Bob" }); 142 | 143 | writer.WriteToDatabase(items); 144 | writerWithOptions.WriteToDatabase(itemsWithKey); 145 | 146 | var minId = (int?)await _fixture.ExecuteScalar(connection, $"SELECT MIN(Id) FROM {tableName}", transaction); 147 | var minIdWithKey = (int?)await _fixture.ExecuteScalar(connection, $"SELECT MIN(Id) FROM {tableNameWithKey}", transaction); 148 | 149 | Assert.Equal(1, minId); 150 | Assert.Equal(11, minIdWithKey); 151 | 152 | transaction.Rollback(); 153 | 154 | var emptyMinId = await _fixture.ExecuteScalar(connection, $"SELECT MIN(Id) FROM {tableName}"); 155 | var emptyMinIdWithKey = await _fixture.ExecuteScalar(connection, $"SELECT MIN(Id) FROM {tableNameWithKey}"); 156 | 157 | Assert.Equal(emptyMinId, System.DBNull.Value); 158 | Assert.Equal(emptyMinIdWithKey, System.DBNull.Value); 159 | } 160 | } 161 | } 162 | 163 | public class OrdinalAndColumnNameExampleType 164 | { 165 | [NotMapped] 166 | public string Dummy { get; set; } 167 | 168 | [Column(Order = 0)] 169 | public int Id { get; set; } 170 | 171 | [NotMapped] 172 | public string Name { get; set; } 173 | 174 | [Column("Name")] 175 | public string Name2 { get; set; } 176 | } 177 | 178 | [Fact] 179 | public async Task Should_Handle_Both_Ordinal_And_ColumnName_For_Destination_Mapping() 180 | { 181 | string tableName = _fixture.DropCreate(nameof(OrdinalAndColumnNameExampleType)); 182 | 183 | var writer = new BulkWriter(_fixture.TestConnectionString); 184 | 185 | var items = new[] { new OrdinalAndColumnNameExampleType { Id = 1, Name2 = "Bob" } }; 186 | 187 | writer.WriteToDatabase(items); 188 | 189 | var count = (int)await _fixture.ExecuteScalar($"SELECT COUNT(1) FROM {tableName}"); 190 | 191 | Assert.Equal(1, count); 192 | } 193 | 194 | public class MyTestClassForNvarCharMax 195 | { 196 | public int Id { get; set; } 197 | public string Name { get; set; } 198 | } 199 | 200 | [Fact] 201 | public async Task Should_Handle_Column_Nvarchar_With_Length_Max() 202 | { 203 | string tableName = nameof(MyTestClassForNvarCharMax); 204 | _fixture.ExecuteNonQuery($"DROP TABLE IF EXISTS [dbo].[{tableName}]"); 205 | _fixture.ExecuteNonQuery( 206 | "CREATE TABLE [dbo].[" + tableName + "](" + 207 | "[Id] [int] IDENTITY(1,1) NOT NULL," + 208 | "[Name] [nvarchar](MAX) NULL," + 209 | "CONSTRAINT [PK_" + tableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + 210 | ")"); 211 | 212 | var writer = new BulkWriter(_fixture.TestConnectionString); 213 | 214 | var items = new[] { new MyTestClassForNvarCharMax { Id = 1, Name = "Bob" } }; 215 | 216 | writer.WriteToDatabase(items); 217 | 218 | var count = (int)await _fixture.ExecuteScalar($"SELECT COUNT(1) FROM {tableName}"); 219 | 220 | Assert.Equal(1, count); 221 | } 222 | 223 | public class MyTestClassForVarBinary 224 | { 225 | public int Id { get; set; } 226 | public byte[] Data { get; set; } 227 | } 228 | 229 | [Fact] 230 | public async Task Should_Handle_Column_VarBinary_Large() 231 | { 232 | string tableName = nameof(MyTestClassForVarBinary); 233 | 234 | _fixture.ExecuteNonQuery($"DROP TABLE IF EXISTS [dbo].[{tableName}]"); 235 | _fixture.ExecuteNonQuery( 236 | "CREATE TABLE [dbo].[" + tableName + "](" + 237 | "[Id] [int] IDENTITY(1,1) NOT NULL," + 238 | "[Data] [varbinary](MAX) NULL," + 239 | "CONSTRAINT [PK_" + tableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + 240 | ")"); 241 | 242 | var writer = new BulkWriter(_fixture.TestConnectionString); 243 | var items = new[] { new MyTestClassForVarBinary { Id = 1, Data = new byte[1024 * 1024 * 1] } }; 244 | new Random().NextBytes(items.First().Data); 245 | 246 | writer.WriteToDatabase(items); 247 | 248 | var count = (int)await _fixture.ExecuteScalar($"SELECT COUNT(1) FROM {tableName}"); 249 | var data = (byte[])await _fixture.ExecuteScalar($"SELECT TOP 1 Data FROM {tableName}"); 250 | Assert.Equal(items.First().Data, data); 251 | Assert.Equal(1, count); 252 | } 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /src/BulkWriter.Tests/EnumerableDataReaderTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.ComponentModel.DataAnnotations.Schema; 4 | using System.Linq; 5 | using System.Text; 6 | using BulkWriter.Internal; 7 | using Xunit; 8 | 9 | namespace BulkWriter.Tests 10 | { 11 | [Collection(nameof(DbContainerFixture))] 12 | public class EnumerableDataReaderTests : IDisposable 13 | { 14 | private readonly string _connectionString; 15 | 16 | private readonly string _tableName = nameof(MyTestClass); 17 | private readonly string _customOrderTableName = nameof(MyCustomOrderTestClass); 18 | 19 | private readonly IEnumerable _enumerable; 20 | private readonly EnumerableDataReader _dataReader; 21 | 22 | private readonly IEnumerable _customOrderEnumerable; 23 | private readonly EnumerableDataReader _customOrderDataReader; 24 | 25 | private readonly DbContainerFixture _fixture; 26 | 27 | public EnumerableDataReaderTests(DbContainerFixture fixture) 28 | { 29 | _fixture = fixture; 30 | _connectionString = fixture.TestConnectionString; 31 | 32 | // 33 | // Setup for testing default mapping using the source entity's property positions as the ordinals. 34 | // 35 | 36 | _enumerable = new[] { new MyTestClass() }; 37 | 38 | _fixture.ExecuteNonQuery(_connectionString, $"DROP TABLE IF EXISTS [dbo].[{_tableName}]"); 39 | 40 | _fixture.ExecuteNonQuery(_connectionString, 41 | "CREATE TABLE [dbo].[" + _tableName + "](" + 42 | "[Id] [int] IDENTITY(1,1) NOT NULL," + 43 | "[Name] [nvarchar](50) NULL," + 44 | "[Data] [varbinary](max) NULL," + 45 | "CONSTRAINT [PK_" + _tableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + 46 | ")"); 47 | 48 | var propertyMappings = typeof(MyTestClass).BuildMappings(); 49 | 50 | _dataReader = new EnumerableDataReader(_enumerable, propertyMappings); 51 | _dataReader.Read(); 52 | 53 | 54 | // 55 | // Setup for testing custom mapping using [Column(Order = ...)] to specify ordinals on the source 56 | // entity. This is useful when the layout of the properties on the source entity doesn't match 57 | // the column ordinals in the database table (e.g., tables generated by EF Core <= 2.0, which 58 | // seems to create the columns by ordering the property names alphabetically). 59 | // 60 | 61 | _customOrderEnumerable = new[] { new MyCustomOrderTestClass() }; 62 | 63 | _fixture.ExecuteNonQuery(_connectionString, $"DROP TABLE IF EXISTS [dbo].[{_customOrderTableName}]"); 64 | 65 | _fixture.ExecuteNonQuery(_connectionString, 66 | "CREATE TABLE [dbo].[" + _customOrderTableName + "](" + 67 | "[Id] [int] IDENTITY(1,1) NOT NULL," + 68 | "[FirstName] [nvarchar](50) NULL," + 69 | "[MiddleName] [nvarchar](50) NULL," + 70 | "[LastName] [nvarchar](50) NULL," + 71 | "CONSTRAINT [PK_" + _customOrderTableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + 72 | ")"); 73 | 74 | var customOrderPropertyMappings = typeof(MyCustomOrderTestClass).BuildMappings(); 75 | 76 | _customOrderDataReader = new EnumerableDataReader(_customOrderEnumerable, customOrderPropertyMappings); 77 | _customOrderDataReader.Read(); 78 | } 79 | 80 | public void Dispose() 81 | { 82 | _fixture.ExecuteNonQuery(_connectionString, "DROP TABLE " + _tableName); 83 | _fixture.ExecuteNonQuery(_connectionString, "DROP TABLE " + _customOrderTableName); 84 | } 85 | 86 | [Fact] 87 | public void Read_Advances_Enumerable() 88 | { 89 | Assert.Same(_enumerable.ElementAt(0), _dataReader.Current); 90 | } 91 | 92 | [Fact] 93 | public void GetOrdinal_Returns_Correct_Value() 94 | { 95 | // Use source entity property names to lookup the destination ordinals. 96 | // Here, the destination ordinal is the default positional ordinal from each property on 97 | // the source entity. There is no custom mapping from ColumnAttributes. 98 | Assert.Equal(0, _dataReader.GetOrdinal(nameof(MyTestClass.Id))); 99 | Assert.Equal(1, _dataReader.GetOrdinal(nameof(MyTestClass.Name))); 100 | Assert.Equal(2, _dataReader.GetOrdinal(nameof(MyTestClass.Data))); 101 | } 102 | 103 | [Fact] 104 | public void GetOrdinal_Returns_Correct_Value_With_Custom_Order() 105 | { 106 | // Use source entity property names to lookup the destination ordinals. 107 | // Here, GetOrdinal returns the destination (i.e., database column) ordinal as defined in 108 | // the ColumnAttributes declared on the source entity's properties. 109 | Assert.Equal(0, _customOrderDataReader.GetOrdinal(nameof(MyCustomOrderTestClass.Id))); 110 | Assert.Equal(4, _customOrderDataReader.GetOrdinal(nameof(MyCustomOrderTestClass.FirstName))); 111 | Assert.Equal(1, _customOrderDataReader.GetOrdinal(nameof(MyCustomOrderTestClass.MiddleName))); 112 | Assert.Equal(2, _customOrderDataReader.GetOrdinal(nameof(MyCustomOrderTestClass.LastName))); 113 | Assert.Equal(3, _customOrderDataReader.GetOrdinal(nameof(MyCustomOrderTestClass.Data))); 114 | } 115 | 116 | [Fact] 117 | public void GetOrdinal_Throws_For_Invalid_Name() 118 | { 119 | Assert.Throws(() => 120 | { 121 | Assert.Equal(1, _dataReader.GetOrdinal("ABC123XYZ_foobar_bad_name")); 122 | }); 123 | } 124 | 125 | [Fact] 126 | public void GetOrdinal_Throws_For_Invalid_Name_With_Custom_Order() 127 | { 128 | Assert.Throws(() => 129 | { 130 | Assert.Equal(1, _customOrderDataReader.GetOrdinal("ABC123XYZ_foobar_bad_name")); 131 | }); 132 | } 133 | 134 | [Fact] 135 | public void IsDbNull_Returns_Correct_Value() 136 | { 137 | Assert.True(_dataReader.IsDBNull(1)); 138 | } 139 | 140 | [Fact] 141 | public void IsDbNull_Returns_Correct_Value_With_Custom_Order() 142 | { 143 | Assert.True(_customOrderDataReader.IsDBNull(1)); 144 | } 145 | 146 | [Fact] 147 | public void GetValue_Returns_Correct_Value() 148 | { 149 | var element = _enumerable.ElementAt(0); 150 | element.Id = 418; 151 | element.Name = "Michael"; 152 | 153 | Assert.Equal(418, _dataReader.GetValue(0)); 154 | Assert.Equal("Michael", _dataReader.GetValue(1)); 155 | } 156 | 157 | [Fact] 158 | public void GetValue_Returns_Correct_Value_With_Custom_Order() 159 | { 160 | var element = _customOrderEnumerable.ElementAt(0); 161 | element.Id = 11; 162 | element.FirstName = "Edgar"; 163 | element.MiddleName = "Allan"; 164 | element.LastName = "Poe"; 165 | 166 | // We look up the values from the source entity using the source ordinals (i.e., the position 167 | // of the properties as declared on the entity, NOT via ColumnAttribute). 168 | Assert.Equal(11, _customOrderDataReader.GetValue(0)); 169 | Assert.Equal("Edgar", _customOrderDataReader.GetValue(1)); 170 | Assert.Equal("Allan", _customOrderDataReader.GetValue(2)); 171 | Assert.Equal("Poe", _customOrderDataReader.GetValue(3)); 172 | Assert.Null(_customOrderDataReader.GetValue(4)); 173 | } 174 | 175 | [Fact] 176 | public void GetBytes_Returns_Correct_Value() 177 | { 178 | var inputBytes = Encoding.UTF8.GetBytes("Michael"); 179 | 180 | var element = _enumerable.ElementAt(0); 181 | element.Id = 419; 182 | element.Data = inputBytes; 183 | 184 | var buffer = new byte[128]; 185 | const int fieldOffset = 0; 186 | const int bufferOffset = 10; 187 | var bytesRead = _dataReader.GetBytes(2, fieldOffset, buffer, bufferOffset, buffer.Length - bufferOffset); 188 | 189 | Assert.Equal(bytesRead, inputBytes.Length); 190 | Assert.Equal("Michael", Encoding.UTF8.GetString(buffer, bufferOffset, (int)bytesRead)); 191 | } 192 | 193 | [Fact] 194 | public void GetBytes_Returns_Correct_Value_Equal_Buffer() 195 | { 196 | var element = _enumerable.ElementAt(0); 197 | element.Data = new byte[16]; 198 | new Random().NextBytes(element.Data); 199 | 200 | var buffer = new byte[16]; 201 | var count = _dataReader.GetBytes(2, 0, buffer, 0, 0); 202 | 203 | Assert.Equal(16, count); 204 | Assert.Equal(element.Data, buffer); 205 | } 206 | 207 | [Fact] 208 | public void GetBytes_Returns_Correct_Value_Less_Than_Buffer() 209 | { 210 | var element = _enumerable.ElementAt(0); 211 | element.Data = new byte[16]; 212 | new Random().NextBytes(element.Data); 213 | 214 | var buffer = new byte[32]; 215 | var count = _dataReader.GetBytes(2, 0, buffer, 0, 0); 216 | 217 | Assert.Equal(16, count); 218 | Assert.Equal(element.Data, buffer.Take(16)); 219 | Assert.True(buffer.Skip(16).Take(16).All(b => b == 0)); 220 | } 221 | 222 | [Fact] 223 | public void GetBytes_Returns_Correct_Value_Greater_Than_Buffer_Partial_Page() 224 | { 225 | var element = _enumerable.ElementAt(0); 226 | element.Data = new byte[24]; 227 | new Random().NextBytes(element.Data); 228 | 229 | var buffer = new byte[16]; 230 | var result = new byte[24]; 231 | var count = ByteReadHelper(2, buffer, result); 232 | 233 | Assert.Equal(24, count); 234 | Assert.Equal(element.Data, result); 235 | } 236 | 237 | [Fact] 238 | public void GetBytes_Returns_Correct_Value_Greater_Than_Buffer_Multiple_Full_Pages() 239 | { 240 | var element = _enumerable.ElementAt(0); 241 | element.Data = new byte[16 * 3]; 242 | new Random().NextBytes(element.Data); 243 | 244 | var buffer = new byte[16]; 245 | var result = new byte[16 * 3]; 246 | var count = ByteReadHelper(2, buffer, result); 247 | 248 | Assert.Equal(16 * 3, count); 249 | Assert.Equal(element.Data, result); 250 | } 251 | 252 | [Fact] 253 | public void GetBytes_Returns_Correct_Value_Empty() 254 | { 255 | var element = _enumerable.ElementAt(0); 256 | element.Data = new byte[0]; 257 | new Random().NextBytes(element.Data); 258 | 259 | var buffer = new byte[16]; 260 | var result = new byte[0]; 261 | var count = ByteReadHelper(2, buffer, result); 262 | 263 | Assert.Equal(0, count); 264 | Assert.Equal(element.Data, result); 265 | } 266 | 267 | [Fact] 268 | public void GetBytes_Returns_Correct_Value_Default_BulkCopy_Buffer() 269 | { 270 | var element = _enumerable.ElementAt(0); 271 | element.Data = Guid.NewGuid().ToByteArray(); 272 | 273 | var buffer = new byte[4096]; 274 | var result = new byte[element.Data.Length]; 275 | var count = ByteReadHelper(2, buffer, result); 276 | 277 | Assert.Equal(element.Data.Length, count); 278 | Assert.Equal(element.Data, result); 279 | } 280 | 281 | private long ByteReadHelper(int ordinal, byte[] buffer, byte[] result) 282 | { 283 | long count; 284 | long offset = 0; 285 | do 286 | { 287 | count = _dataReader.GetBytes(ordinal, offset, buffer, 0, 0); 288 | Buffer.BlockCopy(buffer, 0, result, (int)offset, (int)count); 289 | offset += count; 290 | } while (count == buffer.Length); 291 | return offset; 292 | } 293 | 294 | [Fact] 295 | public void GetBytes_Returns_Correct_Value_With_Custom_Order() 296 | { 297 | var inputBytes = Encoding.UTF8.GetBytes("Edgar"); 298 | 299 | var element = _customOrderEnumerable.ElementAt(0); 300 | element.Id = 12; 301 | element.Data = inputBytes; 302 | 303 | var buffer = new byte[128]; 304 | const int fieldOffset = 0; 305 | const int bufferOffset = 10; 306 | 307 | // When reading values from the source object, we use the property's positional ordinal. 308 | var bytesRead = _customOrderDataReader.GetBytes(4, fieldOffset, buffer, bufferOffset, buffer.Length - bufferOffset); 309 | 310 | Assert.Equal(bytesRead, inputBytes.Length); 311 | Assert.Equal("Edgar", Encoding.UTF8.GetString(buffer, bufferOffset, (int)bytesRead)); 312 | } 313 | 314 | [Fact] 315 | public void GetName_Returns_Correct_Value() 316 | { 317 | Assert.Equal(nameof(MyTestClass.Id), _dataReader.GetName(0)); 318 | Assert.Equal(nameof(MyTestClass.Name), _dataReader.GetName(1)); 319 | } 320 | 321 | [Fact] 322 | public void GetName_Returns_Correct_Value_With_Custom_Order() 323 | { 324 | // GetName returns the source property's name. We use the source property's positional 325 | // ordinal to look that up. 326 | Assert.Equal(nameof(MyCustomOrderTestClass.Id), _customOrderDataReader.GetName(0)); 327 | Assert.Equal(nameof(MyCustomOrderTestClass.FirstName), _customOrderDataReader.GetName(1)); 328 | Assert.Equal(nameof(MyCustomOrderTestClass.MiddleName), _customOrderDataReader.GetName(2)); 329 | Assert.Equal(nameof(MyCustomOrderTestClass.LastName), _customOrderDataReader.GetName(3)); 330 | Assert.Equal(nameof(MyCustomOrderTestClass.Data), _customOrderDataReader.GetName(4)); 331 | } 332 | 333 | [Fact] 334 | public void FieldCount_Returns_Correct_Value() 335 | { 336 | Assert.Equal(3, _dataReader.FieldCount); 337 | } 338 | 339 | [Fact] 340 | public void FieldCount_Returns_Correct_Value_With_Custom_Order() 341 | { 342 | Assert.Equal(5, _customOrderDataReader.FieldCount); 343 | } 344 | 345 | public class MyTestClass 346 | { 347 | public int Id { get; set; } 348 | 349 | public string Name { get; set; } 350 | 351 | public byte[] Data { get; set; } 352 | } 353 | 354 | /// 355 | /// For testing property mapping where property layout doesn't match the database table's ordinals. 356 | /// 357 | public class MyCustomOrderTestClass 358 | { 359 | [Column(Order = 0)] 360 | public int Id { get; set; } 361 | 362 | [Column(Order = 4)] 363 | public string FirstName { get; set; } 364 | 365 | [Column(Order = 1)] 366 | public string MiddleName { get; set; } 367 | 368 | [Column(Order = 2)] 369 | public string LastName { get; set; } 370 | 371 | [Column(Order = 3)] 372 | public byte[] Data { get; set; } 373 | } 374 | } 375 | } 376 | -------------------------------------------------------------------------------- /src/BulkWriter.Tests/AsyncEnumerableDataReaderTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.ComponentModel.DataAnnotations.Schema; 4 | using System.Linq; 5 | using System.Text; 6 | using BulkWriter.Internal; 7 | using System.Threading.Tasks; 8 | using Xunit; 9 | 10 | namespace BulkWriter.Tests 11 | { 12 | [Collection(nameof(DbContainerFixture))] 13 | public class AsyncEnumerableDataReaderTests : IAsyncLifetime 14 | { 15 | private readonly string _connectionString; 16 | 17 | private readonly string _tableName = nameof(MyTestClass); 18 | private readonly string _customOrderTableName = nameof(MyCustomOrderTestClass); 19 | 20 | private readonly IAsyncEnumerable _enumerable; 21 | private readonly AsyncEnumerableDataReader _dataReader; 22 | 23 | private readonly IAsyncEnumerable _customOrderEnumerable; 24 | private readonly AsyncEnumerableDataReader _customOrderDataReader; 25 | 26 | private readonly DbContainerFixture _fixture; 27 | 28 | public AsyncEnumerableDataReaderTests(DbContainerFixture fixture) 29 | { 30 | _fixture = fixture; 31 | _connectionString = fixture.TestConnectionString; 32 | 33 | // 34 | // Setup for testing default mapping using the source entity's property positions as the ordinals. 35 | // 36 | 37 | _enumerable = new[] { new MyTestClass() }.ToAsyncEnumerable(); 38 | 39 | _fixture.ExecuteNonQuery(_connectionString, $"DROP TABLE IF EXISTS [dbo].[{_tableName}]"); 40 | 41 | _fixture.ExecuteNonQuery(_connectionString, 42 | "CREATE TABLE [dbo].[" + _tableName + "](" + 43 | "[Id] [int] IDENTITY(1,1) NOT NULL," + 44 | "[Name] [nvarchar](50) NULL," + 45 | "[Data] [varbinary](max) NULL," + 46 | "CONSTRAINT [PK_" + _tableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + 47 | ")"); 48 | 49 | var propertyMappings = typeof(MyTestClass).BuildMappings(); 50 | 51 | _dataReader = new AsyncEnumerableDataReader(_enumerable, propertyMappings); 52 | 53 | 54 | // 55 | // Setup for testing custom mapping using [Column(Order = ...)] to specify ordinals on the source 56 | // entity. This is useful when the layout of the properties on the source entity doesn't match 57 | // the column ordinals in the database table (e.g., tables generated by EF Core <= 2.0, which 58 | // seems to create the columns by ordering the property names alphabetically). 59 | // 60 | 61 | _customOrderEnumerable = new[] { new MyCustomOrderTestClass() }.ToAsyncEnumerable(); 62 | 63 | _fixture.ExecuteNonQuery(_connectionString, $"DROP TABLE IF EXISTS [dbo].[{_customOrderTableName}]"); 64 | 65 | _fixture.ExecuteNonQuery(_connectionString, 66 | "CREATE TABLE [dbo].[" + _customOrderTableName + "](" + 67 | "[Id] [int] IDENTITY(1,1) NOT NULL," + 68 | "[FirstName] [nvarchar](50) NULL," + 69 | "[MiddleName] [nvarchar](50) NULL," + 70 | "[LastName] [nvarchar](50) NULL," + 71 | "CONSTRAINT [PK_" + _customOrderTableName + "] PRIMARY KEY CLUSTERED ([Id] ASC)" + 72 | ")"); 73 | 74 | var customOrderPropertyMappings = typeof(MyCustomOrderTestClass).BuildMappings(); 75 | 76 | _customOrderDataReader = new AsyncEnumerableDataReader(_customOrderEnumerable, customOrderPropertyMappings); 77 | } 78 | 79 | [Fact] 80 | public async Task Read_Advances_Enumerable() 81 | { 82 | Assert.Same(await _enumerable.ElementAtAsync(0), _dataReader.Current); 83 | } 84 | 85 | [Fact] 86 | public void GetOrdinal_Returns_Correct_Value() 87 | { 88 | // Use source entity property names to lookup the destination ordinals. 89 | // Here, the destination ordinal is the default positional ordinal from each property on 90 | // the source entity. There is no custom mapping from ColumnAttributes. 91 | Assert.Equal(0, _dataReader.GetOrdinal(nameof(MyTestClass.Id))); 92 | Assert.Equal(1, _dataReader.GetOrdinal(nameof(MyTestClass.Name))); 93 | Assert.Equal(2, _dataReader.GetOrdinal(nameof(MyTestClass.Data))); 94 | } 95 | 96 | [Fact] 97 | public void GetOrdinal_Returns_Correct_Value_With_Custom_Order() 98 | { 99 | // Use source entity property names to lookup the destination ordinals. 100 | // Here, GetOrdinal returns the destination (i.e., database column) ordinal as defined in 101 | // the ColumnAttributes declared on the source entity's properties. 102 | Assert.Equal(0, _customOrderDataReader.GetOrdinal(nameof(MyCustomOrderTestClass.Id))); 103 | Assert.Equal(4, _customOrderDataReader.GetOrdinal(nameof(MyCustomOrderTestClass.FirstName))); 104 | Assert.Equal(1, _customOrderDataReader.GetOrdinal(nameof(MyCustomOrderTestClass.MiddleName))); 105 | Assert.Equal(2, _customOrderDataReader.GetOrdinal(nameof(MyCustomOrderTestClass.LastName))); 106 | Assert.Equal(3, _customOrderDataReader.GetOrdinal(nameof(MyCustomOrderTestClass.Data))); 107 | } 108 | 109 | [Fact] 110 | public void GetOrdinal_Throws_For_Invalid_Name() 111 | { 112 | Assert.Throws(() => 113 | { 114 | Assert.Equal(1, _dataReader.GetOrdinal("ABC123XYZ_foobar_bad_name")); 115 | }); 116 | } 117 | 118 | [Fact] 119 | public void GetOrdinal_Throws_For_Invalid_Name_With_Custom_Order() 120 | { 121 | Assert.Throws(() => 122 | { 123 | Assert.Equal(1, _customOrderDataReader.GetOrdinal("ABC123XYZ_foobar_bad_name")); 124 | }); 125 | } 126 | 127 | [Fact] 128 | public void IsDbNull_Returns_Correct_Value() 129 | { 130 | Assert.True(_dataReader.IsDBNull(1)); 131 | } 132 | 133 | [Fact] 134 | public void IsDbNull_Returns_Correct_Value_With_Custom_Order() 135 | { 136 | Assert.True(_customOrderDataReader.IsDBNull(1)); 137 | } 138 | 139 | [Fact] 140 | public async Task GetValue_Returns_Correct_Value() 141 | { 142 | var element = await _enumerable.ElementAtAsync(0); 143 | element.Id = 418; 144 | element.Name = "Michael"; 145 | 146 | Assert.Equal(418, _dataReader.GetValue(0)); 147 | Assert.Equal("Michael", _dataReader.GetValue(1)); 148 | } 149 | 150 | [Fact] 151 | public async Task GetValue_Returns_Correct_Value_With_Custom_Order() 152 | { 153 | var element = await _customOrderEnumerable.ElementAtAsync(0); 154 | element.Id = 11; 155 | element.FirstName = "Edgar"; 156 | element.MiddleName = "Allan"; 157 | element.LastName = "Poe"; 158 | 159 | // We look up the values from the source entity using the source ordinals (i.e., the position 160 | // of the properties as declared on the entity, NOT via ColumnAttribute). 161 | Assert.Equal(11, _customOrderDataReader.GetValue(0)); 162 | Assert.Equal("Edgar", _customOrderDataReader.GetValue(1)); 163 | Assert.Equal("Allan", _customOrderDataReader.GetValue(2)); 164 | Assert.Equal("Poe", _customOrderDataReader.GetValue(3)); 165 | Assert.Null(_customOrderDataReader.GetValue(4)); 166 | } 167 | 168 | [Fact] 169 | public async Task GetBytes_Returns_Correct_Value() 170 | { 171 | var inputBytes = Encoding.UTF8.GetBytes("Michael"); 172 | 173 | var element = await _enumerable.ElementAtAsync(0); 174 | element.Id = 419; 175 | element.Data = inputBytes; 176 | 177 | var buffer = new byte[128]; 178 | const int fieldOffset = 0; 179 | const int bufferOffset = 10; 180 | var bytesRead = _dataReader.GetBytes(2, fieldOffset, buffer, bufferOffset, buffer.Length - bufferOffset); 181 | 182 | Assert.Equal(bytesRead, inputBytes.Length); 183 | Assert.Equal("Michael", Encoding.UTF8.GetString(buffer, bufferOffset, (int)bytesRead)); 184 | } 185 | 186 | [Fact] 187 | public async Task GetBytes_Returns_Correct_Value_Equal_Buffer() 188 | { 189 | var element = await _enumerable.ElementAtAsync(0); 190 | element.Data = new byte[16]; 191 | new Random().NextBytes(element.Data); 192 | 193 | var buffer = new byte[16]; 194 | var count = _dataReader.GetBytes(2, 0, buffer, 0, 0); 195 | 196 | Assert.Equal(16, count); 197 | Assert.Equal(element.Data, buffer); 198 | } 199 | 200 | [Fact] 201 | public async Task GetBytes_Returns_Correct_Value_Less_Than_Buffer() 202 | { 203 | var element = await _enumerable.ElementAtAsync(0); 204 | element.Data = new byte[16]; 205 | new Random().NextBytes(element.Data); 206 | 207 | var buffer = new byte[32]; 208 | var count = _dataReader.GetBytes(2, 0, buffer, 0, 0); 209 | 210 | Assert.Equal(16, count); 211 | Assert.Equal(element.Data, buffer.Take(16)); 212 | Assert.True(buffer.Skip(16).Take(16).All(b => b == 0)); 213 | } 214 | 215 | [Fact] 216 | public async Task GetBytes_Returns_Correct_Value_Greater_Than_Buffer_Partial_Page() 217 | { 218 | var element = await _enumerable.ElementAtAsync(0); 219 | element.Data = new byte[24]; 220 | new Random().NextBytes(element.Data); 221 | 222 | var buffer = new byte[16]; 223 | var result = new byte[24]; 224 | var count = ByteReadHelper(2, buffer, result); 225 | 226 | Assert.Equal(24, count); 227 | Assert.Equal(element.Data, result); 228 | } 229 | 230 | [Fact] 231 | public async Task GetBytes_Returns_Correct_Value_Greater_Than_Buffer_Multiple_Full_Pages() 232 | { 233 | var element = await _enumerable.ElementAtAsync(0); 234 | element.Data = new byte[16 * 3]; 235 | new Random().NextBytes(element.Data); 236 | 237 | var buffer = new byte[16]; 238 | var result = new byte[16 * 3]; 239 | var count = ByteReadHelper(2, buffer, result); 240 | 241 | Assert.Equal(16 * 3, count); 242 | Assert.Equal(element.Data, result); 243 | } 244 | 245 | [Fact] 246 | public async Task GetBytes_Returns_Correct_Value_Empty() 247 | { 248 | var element = await _enumerable.ElementAtAsync(0); 249 | element.Data = new byte[0]; 250 | new Random().NextBytes(element.Data); 251 | 252 | var buffer = new byte[16]; 253 | var result = new byte[0]; 254 | var count = ByteReadHelper(2, buffer, result); 255 | 256 | Assert.Equal(0, count); 257 | Assert.Equal(element.Data, result); 258 | } 259 | 260 | [Fact] 261 | public async Task GetBytes_Returns_Correct_Value_Default_BulkCopy_Buffer() 262 | { 263 | var element = await _enumerable.ElementAtAsync(0); 264 | element.Data = Guid.NewGuid().ToByteArray(); 265 | 266 | var buffer = new byte[4096]; 267 | var result = new byte[element.Data.Length]; 268 | var count = ByteReadHelper(2, buffer, result); 269 | 270 | Assert.Equal(element.Data.Length, count); 271 | Assert.Equal(element.Data, result); 272 | } 273 | 274 | private long ByteReadHelper(int ordinal, byte[] buffer, byte[] result) 275 | { 276 | long count; 277 | long offset = 0; 278 | do 279 | { 280 | count = _dataReader.GetBytes(ordinal, offset, buffer, 0, 0); 281 | Buffer.BlockCopy(buffer, 0, result, (int)offset, (int)count); 282 | offset += count; 283 | } while (count == buffer.Length); 284 | return offset; 285 | } 286 | 287 | [Fact] 288 | public async Task GetBytes_Returns_Correct_Value_With_Custom_Order() 289 | { 290 | var inputBytes = Encoding.UTF8.GetBytes("Edgar"); 291 | 292 | var element = await _customOrderEnumerable.ElementAtAsync(0); 293 | element.Id = 12; 294 | element.Data = inputBytes; 295 | 296 | var buffer = new byte[128]; 297 | const int fieldOffset = 0; 298 | const int bufferOffset = 10; 299 | 300 | // When reading values from the source object, we use the property's positional ordinal. 301 | var bytesRead = _customOrderDataReader.GetBytes(4, fieldOffset, buffer, bufferOffset, buffer.Length - bufferOffset); 302 | 303 | Assert.Equal(bytesRead, inputBytes.Length); 304 | Assert.Equal("Edgar", Encoding.UTF8.GetString(buffer, bufferOffset, (int)bytesRead)); 305 | } 306 | 307 | [Fact] 308 | public void GetName_Returns_Correct_Value() 309 | { 310 | Assert.Equal(nameof(MyTestClass.Id), _dataReader.GetName(0)); 311 | Assert.Equal(nameof(MyTestClass.Name), _dataReader.GetName(1)); 312 | } 313 | 314 | [Fact] 315 | public void GetName_Returns_Correct_Value_With_Custom_Order() 316 | { 317 | // GetName returns the source property's name. We use the source property's positional 318 | // ordinal to look that up. 319 | Assert.Equal(nameof(MyCustomOrderTestClass.Id), _customOrderDataReader.GetName(0)); 320 | Assert.Equal(nameof(MyCustomOrderTestClass.FirstName), _customOrderDataReader.GetName(1)); 321 | Assert.Equal(nameof(MyCustomOrderTestClass.MiddleName), _customOrderDataReader.GetName(2)); 322 | Assert.Equal(nameof(MyCustomOrderTestClass.LastName), _customOrderDataReader.GetName(3)); 323 | Assert.Equal(nameof(MyCustomOrderTestClass.Data), _customOrderDataReader.GetName(4)); 324 | } 325 | 326 | [Fact] 327 | public void FieldCount_Returns_Correct_Value() 328 | { 329 | Assert.Equal(3, _dataReader.FieldCount); 330 | } 331 | 332 | [Fact] 333 | public void FieldCount_Returns_Correct_Value_With_Custom_Order() 334 | { 335 | Assert.Equal(5, _customOrderDataReader.FieldCount); 336 | } 337 | 338 | public class MyTestClass 339 | { 340 | public int Id { get; set; } 341 | 342 | public string Name { get; set; } 343 | 344 | public byte[] Data { get; set; } 345 | } 346 | 347 | /// 348 | /// For testing property mapping where property layout doesn't match the database table's ordinals. 349 | /// 350 | public class MyCustomOrderTestClass 351 | { 352 | [Column(Order = 0)] 353 | public int Id { get; set; } 354 | 355 | [Column(Order = 4)] 356 | public string FirstName { get; set; } 357 | 358 | [Column(Order = 1)] 359 | public string MiddleName { get; set; } 360 | 361 | [Column(Order = 2)] 362 | public string LastName { get; set; } 363 | 364 | [Column(Order = 3)] 365 | public byte[] Data { get; set; } 366 | } 367 | 368 | public async Task InitializeAsync() 369 | { 370 | await _dataReader.ReadAsync(); 371 | await _customOrderDataReader.ReadAsync(); 372 | } 373 | 374 | public Task DisposeAsync() 375 | { 376 | _fixture.ExecuteNonQuery(_connectionString, "DROP TABLE " + _tableName); 377 | _fixture.ExecuteNonQuery(_connectionString, "DROP TABLE " + _customOrderTableName); 378 | 379 | return Task.CompletedTask; 380 | } 381 | } 382 | } 383 | --------------------------------------------------------------------------------