├── scripts ├── sql │ ├── dump │ │ └── .gitkeep │ ├── inputcsv │ │ └── .gitkeep │ ├── .gitignore │ ├── TrimTables.sql │ ├── ImportDataSales.sql │ ├── ImportDataOrders.sql │ ├── ImportDataCommon.sql │ ├── SQLBI_CreateSqlDatabases.ps1 │ ├── Sql_ImportData.cmd │ ├── SQLBI_ALL_DB.cmd │ ├── CreateDB.sql │ ├── CreateTablesSales.sql │ ├── CreateTablesOrders.sql │ └── CreateTablesCommon.sql ├── build_data │ ├── .gitignore │ ├── make_tool.cmd │ ├── bin │ │ └── 7za.exe │ ├── data.xlsx │ ├── build_all.cmd │ ├── config.json │ └── build_single.cmd └── publish_tool │ ├── data.xlsx │ ├── readme.txt │ ├── publish_tool.cmd │ └── config.json ├── .gitattributes ├── DatabaseGenerator ├── .gitignore ├── Properties │ ├── PublishProfiles │ │ ├── .gitignore │ │ ├── osx-x64.pubxml │ │ ├── LinuxX64.pubxml │ │ ├── osx-arm64.pubxml │ │ └── WinX64.pubxml │ └── launchSettings.json ├── Models │ ├── WeightedItem.cs │ ├── SubCategoryLink.cs │ ├── SubCategory.cs │ ├── Category.cs │ ├── GeoArea.cs │ ├── CustomerCluster.cs │ ├── Order.cs │ ├── OrderRow.cs │ ├── CurrencyExchange.cs │ ├── Store.cs │ ├── Sale.cs │ ├── Product.cs │ ├── DateExtended.cs │ └── Customer.cs ├── assemblyinfo.cs ├── DataWriter │ ├── IDataWriter.cs │ ├── Csv │ │ ├── CsvCurrencyExchange.cs │ │ ├── CsvStore.cs │ │ ├── CsvProduct.cs │ │ ├── CsvDateExtended.cs │ │ ├── CsvCustomer.cs │ │ └── CSVWriter.cs │ └── Parquet │ │ ├── DeltaSchema.cs │ │ ├── ParquetCurrencyExchange.cs │ │ ├── ParquetOrder.cs │ │ ├── ParquetOrderRow.cs │ │ ├── ParquetStore.cs │ │ ├── ParquetProduct.cs │ │ ├── ParquetSale.cs │ │ ├── ParquetDateExtended.cs │ │ ├── ParquetCustomer.cs │ │ └── ParquetWriter.cs ├── MyMath.cs ├── DoublesArray.cs ├── Consts.cs ├── Logger.cs ├── Fast │ ├── ProductsFast.cs │ └── CustomerListFast.cs ├── DatabaseGenerator.csproj ├── Utility.cs ├── MyRnd.cs ├── Converter2OUT.cs ├── Program.cs ├── Config.cs ├── ECBExchangesReader.cs ├── CustomerRaw2Csv.cs └── ExcelReader.cs ├── _test_data ├── .gitignore └── IN │ ├── data_test.xlsx │ └── config_test.json ├── docs └── imgs │ ├── fabric_01.png │ ├── schema_sales.svg │ └── schema_orders.svg ├── LICENSE ├── ContosoDGV2.sln ├── README.md └── .gitignore /scripts/sql/dump/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/sql/inputcsv/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.csv text eol=crlf 2 | -------------------------------------------------------------------------------- /DatabaseGenerator/.gitignore: -------------------------------------------------------------------------------- 1 | assemblygithash.cs -------------------------------------------------------------------------------- /DatabaseGenerator/Properties/PublishProfiles/.gitignore: -------------------------------------------------------------------------------- 1 | !*.pubxml -------------------------------------------------------------------------------- /_test_data/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | OUT/**/*.* 3 | CACHE/**/*.* 4 | TEMP/**/*.* 5 | -------------------------------------------------------------------------------- /scripts/build_data/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | out/**/*.* 3 | cache/**/*.* 4 | 5 | !bin 6 | -------------------------------------------------------------------------------- /scripts/sql/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | inputcsv/**/*.* 3 | 4 | *.bak 5 | *.7z 6 | *.7z.* 7 | 8 | -------------------------------------------------------------------------------- /docs/imgs/fabric_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sql-bi/Contoso-Data-Generator-V2/HEAD/docs/imgs/fabric_01.png -------------------------------------------------------------------------------- /scripts/build_data/make_tool.cmd: -------------------------------------------------------------------------------- 1 | dotnet build ../../ContosoDGV2.sln --no-incremental --configuration Release 2 | pause -------------------------------------------------------------------------------- /_test_data/IN/data_test.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sql-bi/Contoso-Data-Generator-V2/HEAD/_test_data/IN/data_test.xlsx -------------------------------------------------------------------------------- /scripts/build_data/bin/7za.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sql-bi/Contoso-Data-Generator-V2/HEAD/scripts/build_data/bin/7za.exe -------------------------------------------------------------------------------- /scripts/build_data/data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sql-bi/Contoso-Data-Generator-V2/HEAD/scripts/build_data/data.xlsx -------------------------------------------------------------------------------- /scripts/publish_tool/data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sql-bi/Contoso-Data-Generator-V2/HEAD/scripts/publish_tool/data.xlsx -------------------------------------------------------------------------------- /scripts/sql/TrimTables.sql: -------------------------------------------------------------------------------- 1 | DECLARE @LOGLINE NVARCHAR(10) = Char(10) + Char(10) + '#### ' 2 | 3 | PRINT @LOGLINE + 'Trim Customer' 4 | DELETE FROM [Data].Customer 5 | WHERE CustomerKey NOT IN (SELECT DISTINCT CustomerKey FROM [Data].Sales ) 6 | AND CustomerKey NOT IN (SELECT DISTINCT CustomerKey FROM [Data].Orders ) 7 | GO 8 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/WeightedItem.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | public abstract class WeightedItem 7 | { 8 | public double[] Weights { get; set; } // from data file 9 | public double[] DaysWeight { get; set; } // calculated 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/SubCategoryLink.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | public class SubCategoryLink 7 | { 8 | public int SubCategoryID { get; set; } 9 | public int LinkedSubCategoryID { get; set; } 10 | public double PerCent { get; set; } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/SubCategory.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | public class SubCategory : WeightedItem 7 | { 8 | public int SubCategoryID { get; set; } 9 | public int CategoryID { get; set; } 10 | public string SubCategoryName { get; set; } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /DatabaseGenerator/assemblyinfo.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | 6 | [assembly: System.Reflection.AssemblyCompanyAttribute("DatabaseGeneratorV2")] 7 | [assembly: System.Reflection.AssemblyTitleAttribute("DatabaseGeneratorV2")] 8 | [assembly: System.Reflection.AssemblyVersionAttribute("2.0.1.0")] 9 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/Category.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | public class Category : WeightedItem 7 | { 8 | public int CategoryID { get; set; } 9 | 10 | public string CategoryName { get; set; } 11 | 12 | public double[] PricePerCent { get; set; } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /scripts/publish_tool/readme.txt: -------------------------------------------------------------------------------- 1 | Contoso Data Generator V2 2 | 3 | Contoso Data Generator is a tool for generating sample data with randomly generated orders for the Contoso data model to provide demo data. Generated data can be imported into Power BI, Fabric, and other platforms. 4 | 5 | Documentation: https://docs.sqlbi.com/contoso-data-generator/ 6 | Project: https://github.com/sql-bi/Contoso-Data-Generator-V2 7 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/GeoArea.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | public class GeoArea : WeightedItem 7 | { 8 | public int GeoAreaID { get; set; } 9 | public string CountryCode { get; set; } 10 | public string Country { get; set; } 11 | public string StateCode { get; set; } 12 | public string StateLongName { get; set; } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/CustomerCluster.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Fast; 2 | using System; 3 | 4 | 5 | namespace DatabaseGenerator.Models 6 | { 7 | public class CustomerCluster 8 | { 9 | public int ClusterID { get; set; } 10 | public double OrdersWeight { get; set; } 11 | public double CustomersWeight { get; set; } 12 | 13 | 14 | public CustomerListFast CustomersFast { get; set; } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/Order.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | 4 | 5 | namespace DatabaseGenerator.Models 6 | { 7 | 8 | public class Order 9 | { 10 | public long OrderID { get; set; } 11 | public int CustomerID { get; set; } 12 | public int StoreID { get; set; } 13 | public DateTime DT { get; set; } 14 | public DateTime DeliveryDate { get; set; } 15 | public string CurrencyCode { get; set; } 16 | public List Rows { get; set; } 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /DatabaseGenerator/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "DatabaseGenerator": { 4 | "commandName": "Project", 5 | "commandLineArgs": "..\\..\\..\\..\\_test_data\\in\\config_test.json ..\\..\\..\\..\\_test_data\\in\\data_test.xlsx ..\\..\\..\\..\\_test_data\\out ..\\..\\..\\..\\_test_data\\cache param:XOrdersCount=88767 param:XCutDateBefore=2020-06-07 param:XCutDateAfterx=2020-09-27 param:XCustomerPercentage=0.56" 6 | }, 7 | "NewProfile2": { 8 | "commandName": "Project", 9 | "commandLineArgs": "aaaa" 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /DatabaseGenerator/Models/OrderRow.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | 7 | public class OrderRow 8 | { 9 | public int RowNumber { get; set; } // required? 10 | public int CategoryID { get; internal set; } 11 | public int SubCategoryID { get; internal set; } 12 | public int ProductID { get; set; } 13 | public int Quantity { get; set; } 14 | public decimal UnitPrice { get; set; } 15 | public decimal NetPrice { get; set; } 16 | public decimal UnitCost { get; set; } 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/CurrencyExchange.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace DatabaseGenerator.Models 8 | { 9 | public class CurrencyExchange 10 | { 11 | public DateTime Date { get; set; } 12 | public string FromCurrency { get; set; } 13 | public string ToCurrency { get; set; } 14 | public decimal Exchange { get; set; } 15 | 16 | public override string ToString() 17 | { 18 | return Date + " : " + FromCurrency + " : " + ToCurrency + " : " + Exchange; 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /DatabaseGenerator/Properties/PublishProfiles/osx-x64.pubxml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | Release 8 | Any CPU 9 | bin\publish\osx-x64\ 10 | FileSystem 11 | <_TargetId>Folder 12 | net8.0 13 | osx-x64 14 | true 15 | true 16 | false 17 | 18 | -------------------------------------------------------------------------------- /DatabaseGenerator/Properties/PublishProfiles/LinuxX64.pubxml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | Release 8 | Any CPU 9 | bin\publish\linux-x64\ 10 | FileSystem 11 | <_TargetId>Folder 12 | net8.0 13 | linux-x64 14 | true 15 | true 16 | false 17 | 18 | -------------------------------------------------------------------------------- /DatabaseGenerator/Properties/PublishProfiles/osx-arm64.pubxml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | Release 8 | Any CPU 9 | bin\publish\osx-arm64\ 10 | FileSystem 11 | <_TargetId>Folder 12 | net8.0 13 | osx-arm64 14 | true 15 | true 16 | false 17 | 18 | -------------------------------------------------------------------------------- /DatabaseGenerator/Properties/PublishProfiles/WinX64.pubxml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | Release 8 | Any CPU 9 | bin\publish\win-x64\ 10 | FileSystem 11 | <_TargetId>Folder 12 | net8.0 13 | win-x64 14 | true 15 | true 16 | false 17 | false 18 | 19 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/Store.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | public class Store 7 | { 8 | public int StoreID { get; set; } 9 | public int GeoAreaID { get; set; } 10 | public string CountryCode { get; set; } // filled from GeoArea "table" 11 | public DateTime? OpenDate { get; set; } 12 | public DateTime? CloseDate { get; set; } 13 | public int StoreCode { get; set; } 14 | public string Description { get; set; } 15 | public int? SquareMeters { get; set; } 16 | public string Status { get; set; } 17 | 18 | // -- added at runtime 19 | public string Country { get; set; } 20 | public string State { get; set; } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/Sale.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | public class Sale 7 | { 8 | public long OrderKey { get; set; } 9 | public int LineNumber { get; set; } 10 | public DateTime OrderDate { get; set; } 11 | public DateTime DeliveryDate { get; set; } 12 | public int CustomerKey { get; set; } 13 | public int StoreKey { get; set; } 14 | public int ProductKey { get; set; } 15 | public int Quantity { get; set; } 16 | public decimal UnitPrice { get; set; } 17 | public decimal NetPrice { get; set; } 18 | public decimal UnitCost { get; set; } 19 | public string CurrencyCode { get; set; } 20 | public decimal ExchangeRate { get; set; } 21 | 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /scripts/build_data/build_all.cmd: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | ECHO START 3 | 4 | CALL :buildandzip csv-10k 5 | CALL :buildandzip csv-100k 6 | CALL :buildandzip csv-1m 7 | CALL :buildandzip csv-10m 8 | CALL :buildandzip csv-100m -v500m 9 | CALL :buildandzip delta-10k 10 | CALL :buildandzip delta-100k 11 | CALL :buildandzip delta-1m 12 | CALL :buildandzip delta-10m 13 | CALL :buildandzip delta-100m -v500m 14 | CALL :buildandzip parquet-10k 15 | CALL :buildandzip parquet-100k 16 | CALL :buildandzip parquet-1m 17 | CALL :buildandzip parquet-10m 18 | CALL :buildandzip parquet-100m -v500m 19 | GOTO :EOF 20 | 21 | :buildandzip 22 | ECHO BUILD DATA : %~1 %~2 23 | IF EXIST "out\%~1.7z.*" DEL out\%~1.7z.* 24 | CALL build_single.cmd %~1 25 | bin\7za.exe a out\%~1.7z out\%~1\. -x!_log.log %~2 26 | EXIT /B 0 -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/IDataWriter.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | 8 | namespace DatabaseGenerator.DataWriter 9 | { 10 | 11 | public interface IDataWriter 12 | { 13 | void Init(); 14 | 15 | Task WriteOrderWithRows(Order order, 16 | IEnumerable sales); 17 | 18 | Task WriteStaticData(IEnumerable customers, 19 | IEnumerable stores, 20 | IEnumerable products, 21 | IEnumerable dates, 22 | IEnumerable currencyExchanges); 23 | 24 | void Close(); 25 | 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/Product.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | 7 | public class Product : WeightedItem 8 | { 9 | public int ProductID { get; set; } 10 | public string ProductName { get; set; } 11 | public int SubCategoryID { get; set; } 12 | public double Price { get; set; } 13 | public double Cost { get; set; } 14 | public string ProductCode { get; set; } 15 | public string Manufacturer { get; set; } 16 | public string Brand { get; set; } 17 | public string Color { get; set; } 18 | public string WeightUnit { get; set; } 19 | public decimal? Weight { get; set; } 20 | 21 | // --- added at runtime 22 | public int CategoryID { get; set; } 23 | public string CategoryName { get; set; } 24 | public string SubCategoryName { get; set; } 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /DatabaseGenerator/MyMath.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace DatabaseGenerator 6 | { 7 | 8 | internal class MyMath 9 | { 10 | public static double[] Interpolate(int n, double start, double end, double[] interpolationPoints, double[] interpolationValues) 11 | { 12 | // Create a sequence of equally spaced items 13 | var points = MathNet.Numerics.Generate.LinearSpaced(n, start, end); 14 | 15 | // Interpolate n elements 16 | var interpolation = MathNet.Numerics.Interpolate.CubicSpline(interpolationPoints, interpolationValues); 17 | var outVect = new double[n]; 18 | for (int i = 0; i < n; i++) 19 | { 20 | outVect[i] = interpolation.Interpolate(points[i]); 21 | } 22 | 23 | return outVect; 24 | } 25 | 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Csv/CsvCurrencyExchange.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace DatabaseGenerator.DataWriter.Csv 9 | { 10 | public class CsvCurrencyExchange 11 | { 12 | public DateTime Date { get; set; } 13 | public string FromCurrency { get; set; } 14 | public string ToCurrency { get; set; } 15 | public decimal Exchange { get; set; } 16 | 17 | 18 | public static CsvCurrencyExchange FromCurrencyExchange(CurrencyExchange ce) 19 | { 20 | return new CsvCurrencyExchange() 21 | { 22 | Date = ce.Date, 23 | FromCurrency = ce.FromCurrency, 24 | ToCurrency = ce.ToCurrency, 25 | Exchange = ce.Exchange, 26 | }; 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /DatabaseGenerator/DoublesArray.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace DatabaseGenerator 7 | { 8 | public class DoublesArray 9 | { 10 | public double Sum { get; } 11 | 12 | public int Length { get { return Values.Length; } } 13 | 14 | public double[] Values { get; } 15 | 16 | public double[] ProgressiveSum { get; } 17 | 18 | public double this[int index] { get { return Values[index]; } } 19 | 20 | 21 | public DoublesArray(double[] values) 22 | { 23 | Values = values; 24 | Sum = values.Sum(); 25 | 26 | ProgressiveSum = new double[values.Length]; 27 | double s = 0; 28 | for (int i = 0; i < values.Length; i++) 29 | { 30 | s += values[i]; 31 | ProgressiveSum[i] = s; 32 | } 33 | } 34 | 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/DateExtended.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | public class DateExtended 7 | { 8 | public DateTime Date { get; set; } 9 | public string DateKey { get; set; } 10 | public int Year { get; set; } 11 | public string YearQuarter { get; set; } 12 | public int YearQuarterNumber { get; set; } 13 | public string Quarter { get; set; } 14 | public string YearMonth { get; set; } 15 | public string YearMonthShort { get; set; } 16 | public int YearMonthNumber { get; set; } 17 | public string Month { get; set; } 18 | public string MonthShort { get; set; } 19 | public int MonthNumber { get; set; } 20 | public string DayofWeek { get; set; } 21 | public string DayofWeekShort { get; set; } 22 | public int DayofWeekNumber { get; set; } 23 | public int WorkingDay { get; set; } 24 | public int WorkingDayNumber { get; set; } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /DatabaseGenerator/Consts.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace DatabaseGenerator 8 | { 9 | 10 | public static class Consts 11 | { 12 | #pragma warning disable format 13 | 14 | public const string DOWNLOAD_BASE_URL = "https://github.com/sql-bi/Contoso-Data-Generator-V2-Data/releases/download/static-files/"; 15 | 16 | public const string ORDERS = "orders"; 17 | public const string ORDERROWS = "orderrows"; 18 | public const string SALES = "sales"; 19 | public const string CURREXCHS = "currencyexchange"; 20 | public const string CUSTOMERS = "customer"; 21 | public const string DATES = "date"; 22 | public const string PRODUCTS = "product"; 23 | public const string STORES = "store"; 24 | 25 | public const string FILE_ECB_EXCH_CSV = "ECB_eurofxref-hist.csv"; 26 | 27 | #pragma warning restore format 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 SQLBI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/sql/ImportDataSales.sql: -------------------------------------------------------------------------------- 1 | DECLARE @LOGLINE NVARCHAR(10) = Char(10) + Char(10) + '#### ' 2 | 3 | DECLARE @csvSales NVARCHAR(500) 4 | DECLARE @sqlTxt VARCHAR(max) 5 | 6 | PRINT @LOGLINE + 'CD: ' + '$(varCD)' 7 | 8 | SET @csvSales = '$(varCD)' + '\inputcsv\sales.csv' 9 | 10 | 11 | PRINT @LOGLINE + 'Load data' 12 | PRINT @LOGLINE + @csvSales; EXEC ('BULK INSERT [Data].[Sales] FROM ''' + @csvSales + ''' WITH (CODEPAGE = ''65001'', TABLOCK, FORMAT=''CSV'', FIRSTROW=2, FIELDTERMINATOR ='','' )'); 13 | 14 | PRINT @LOGLINE + 'Shrink database' 15 | DBCC SHRINKDATABASE(0) 16 | 17 | PRINT @LOGLINE + 'DB space' 18 | EXEC sp_spaceused 19 | 20 | PRINT @LOGLINE + 'Count records' 21 | SELECT '[CurrencyExchange]', COUNT(1) FROM [Data].[CurrencyExchange] 22 | UNION SELECT '[Customer]', COUNT(1) FROM [Data].[Customer] 23 | UNION SELECT '[Date]', COUNT(1) FROM [Data].[Date] 24 | UNION SELECT '[Product]', COUNT(1) FROM [Data].[Product] 25 | UNION SELECT '[Store]', COUNT(1) FROM [Data].[Store] 26 | UNION SELECT '[Sales]', COUNT(1) FROM [Data].[Sales] 27 | 28 | -------------------------------------------------------------------------------- /DatabaseGenerator/Logger.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Text; 5 | 6 | namespace DatabaseGenerator 7 | { 8 | public class Logger 9 | { 10 | private static object _multiThreadLock = new object(); 11 | private static StreamWriter _streamWriter; 12 | private static DateTime _previousDT = DateTime.UtcNow; 13 | 14 | public static void Init(string fileName) 15 | { 16 | _streamWriter = new StreamWriter(fileName); 17 | _streamWriter.AutoFlush = true; // because this is a log file 18 | } 19 | 20 | public static void Info(string msg) 21 | { 22 | lock (_multiThreadLock) 23 | { 24 | double elapsed = DateTime.UtcNow.Subtract(_previousDT).TotalSeconds; 25 | string txt = $"{DateTime.UtcNow.ToString("yyyyMMdd HH:mm:ss")} | {elapsed.ToString("0.00")} > {msg}"; 26 | Console.WriteLine(txt); 27 | _streamWriter.WriteLine(txt); 28 | _streamWriter.Flush(); 29 | _previousDT = DateTime.UtcNow; 30 | } 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /DatabaseGenerator/Fast/ProductsFast.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | 7 | namespace DatabaseGenerator.Fast 8 | { 9 | 10 | public class ProductsFast 11 | { 12 | 13 | public List Products { get; set; } 14 | 15 | private Dictionary> _groupingBySubCategoryID; 16 | 17 | 18 | public void IndexData() 19 | { 20 | _groupingBySubCategoryID = new Dictionary>(); 21 | 22 | var q = from p in Products group p by p.SubCategoryID into newGroup orderby newGroup.Key select newGroup; 23 | 24 | foreach (var item in q) 25 | { 26 | _groupingBySubCategoryID.Add(item.Key, item.ToList()); 27 | } 28 | } 29 | 30 | 31 | public List GetProductsBySubCategoryID(int subcategoryID) 32 | { 33 | if (!_groupingBySubCategoryID.ContainsKey(subcategoryID)) 34 | return new List(); 35 | 36 | return _groupingBySubCategoryID[subcategoryID]; 37 | } 38 | 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DatabaseGenerator/DatabaseGenerator.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net8.0 6 | false 7 | true 8 | false 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /DatabaseGenerator/Utility.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace DatabaseGenerator 9 | { 10 | 11 | public static class Utility 12 | { 13 | 14 | public static void DirectoryDeleteIfExists(string path, bool recursive) 15 | { 16 | var di = new DirectoryInfo(path); 17 | 18 | if (di.Exists) 19 | { 20 | di.Delete(recursive); 21 | } 22 | } 23 | 24 | public static void DeleteFiles(string folderPath, string searchPattern, bool resursively) 25 | { 26 | Directory 27 | .GetFiles(folderPath, searchPattern, new EnumerationOptions() { RecurseSubdirectories = resursively }) 28 | .ToList() 29 | .ForEach(x => File.Delete(x)); 30 | } 31 | 32 | 33 | //public static void DeleteFilesRecursively(string folderPath, string searchPattern) 34 | //{ 35 | // Directory 36 | // .GetFiles(folderPath, searchPattern, new EnumerationOptions() { RecurseSubdirectories = true }) 37 | // .ToList() 38 | // .ForEach(x => File.Delete(x)); 39 | //} 40 | 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /scripts/sql/ImportDataOrders.sql: -------------------------------------------------------------------------------- 1 | DECLARE @LOGLINE NVARCHAR(10) = Char(10) + Char(10) + '#### ' 2 | 3 | DECLARE @csvOrders NVARCHAR(500) 4 | DECLARE @csvOrderRows NVARCHAR(500) 5 | DECLARE @sqlTxt VARCHAR(max) 6 | 7 | PRINT @LOGLINE + 'CD: ' + '$(varCD)' 8 | 9 | SET @csvOrders = '$(varCD)' + '\inputcsv\orders.csv' 10 | SET @csvOrderRows = '$(varCD)' + '\inputcsv\orderrows.csv' 11 | 12 | 13 | PRINT @LOGLINE + 'Load data' 14 | PRINT @LOGLINE + @csvOrders; EXEC ('BULK INSERT [Data].[Orders] FROM ''' + @csvOrders + ''' WITH (CODEPAGE = ''65001'', TABLOCK, FORMAT=''CSV'', FIRSTROW=2, FIELDTERMINATOR ='','' )'); 15 | PRINT @LOGLINE + @csvOrderRows; EXEC ('BULK INSERT [Data].[OrderRows] FROM ''' + @csvOrderRows + ''' WITH (CODEPAGE = ''65001'', TABLOCK, FORMAT=''CSV'', FIRSTROW=2, FIELDTERMINATOR ='','' )'); 16 | 17 | PRINT @LOGLINE + 'Shrink database' 18 | DBCC SHRINKDATABASE(0) 19 | 20 | PRINT @LOGLINE + 'DB space' 21 | EXEC sp_spaceused 22 | 23 | PRINT @LOGLINE + 'Count records' 24 | SELECT '[CurrencyExchange]', COUNT(1) FROM [Data].[CurrencyExchange] 25 | UNION SELECT '[Customer]', COUNT(1) FROM [Data].[Customer] 26 | UNION SELECT '[Date]', COUNT(1) FROM [Data].[Date] 27 | UNION SELECT '[Product]', COUNT(1) FROM [Data].[Product] 28 | UNION SELECT '[Store]', COUNT(1) FROM [Data].[Store] 29 | UNION SELECT '[Orders]', COUNT(1) FROM [Data].[Orders] 30 | UNION SELECT '[OrderRows]', COUNT(1) FROM [Data].[OrderRows] 31 | 32 | -------------------------------------------------------------------------------- /ContosoDGV2.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.9.34701.34 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DatabaseGenerator", "DatabaseGenerator\DatabaseGenerator.csproj", "{4391CC1A-71BD-4826-A40B-FA304D35176F}" 7 | EndProject 8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{4C8AFF47-8F42-421F-B077-F1DC116929A5}" 9 | ProjectSection(SolutionItems) = preProject 10 | _test_data\IN\config_test.json = _test_data\IN\config_test.json 11 | README.md = README.md 12 | EndProjectSection 13 | EndProject 14 | Global 15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 16 | Debug|Any CPU = Debug|Any CPU 17 | Release|Any CPU = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 20 | {4391CC1A-71BD-4826-A40B-FA304D35176F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {4391CC1A-71BD-4826-A40B-FA304D35176F}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {4391CC1A-71BD-4826-A40B-FA304D35176F}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {4391CC1A-71BD-4826-A40B-FA304D35176F}.Release|Any CPU.Build.0 = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {5AB20085-94C2-4444-B1FC-79332D59165F} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Parquet/DeltaSchema.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.DataWriter.Parquet; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Text.Json.Serialization; 7 | using System.Threading.Tasks; 8 | 9 | 10 | namespace DatabaseGenerator.DataWriter.Parquet 11 | { 12 | 13 | // ----------------- 14 | // Required for serialization because JsonSerializerIsReflectionEnabledByDefault = false 15 | [JsonSerializable(typeof(DeltaSchema))] 16 | internal partial class DeltaSchemaSerializerContext : JsonSerializerContext 17 | { 18 | } 19 | // ----------------- 20 | 21 | 22 | // --------------------------------------- 23 | // Delta schema json serialization classes 24 | // --------------------------------------- 25 | 26 | public class DeltaSchema 27 | { 28 | public string type { get; set; } 29 | public List fields { get; set; } 30 | } 31 | 32 | public class DeltaField 33 | { 34 | public string name { get; set; } 35 | public string type { get; set; } 36 | public bool nullable { get; set; } 37 | public DeltaMetadata metadata { get; set; } 38 | 39 | public static DeltaField GetInstance(string nameValue, string typeValue, bool nullableValue) 40 | { 41 | return new DeltaField() { name = nameValue, type = typeValue, nullable = nullableValue, metadata = new DeltaMetadata() }; 42 | } 43 | } 44 | 45 | public class DeltaMetadata 46 | { 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Csv/CsvStore.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | 9 | namespace DatabaseGenerator.DataWriter.Csv 10 | { 11 | 12 | internal class CsvStore 13 | { 14 | 15 | // --------------------------------------------------------- 16 | public int StoreKey { get; set; } 17 | public int StoreCode { get; set; } 18 | public int GeoAreaKey { get; set; } 19 | public string CountryCode { get; set; } 20 | public string CountryName { get; set; } 21 | public string State { get; set; } 22 | public DateTime? OpenDate { get; set; } 23 | public DateTime? CloseDate { get; set; } 24 | public string Description { get; set; } 25 | public int? SquareMeters { get; set; } 26 | public string Status { get; set; } 27 | // --------------------------------------------------------- 28 | 29 | 30 | public static CsvStore GetFromStore(Store s) 31 | { 32 | return new CsvStore() 33 | { 34 | StoreKey = s.StoreID, 35 | StoreCode = s.StoreCode, 36 | GeoAreaKey = s.GeoAreaID, 37 | CountryCode = s.CountryCode, 38 | CountryName = s.Country, 39 | State = s.State, 40 | OpenDate = s.OpenDate, 41 | CloseDate = s.CloseDate, 42 | Description = s.Description, 43 | SquareMeters = s.SquareMeters, 44 | Status = s.Status, 45 | }; 46 | } 47 | 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /DatabaseGenerator/Fast/CustomerListFast.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | 6 | 7 | namespace DatabaseGenerator.Fast 8 | { 9 | 10 | public class CustomerListFast 11 | { 12 | 13 | private Dictionary> _customersByGeoAreaID; 14 | 15 | 16 | private List _customerList { get; set; } 17 | 18 | 19 | public int Count 20 | { 21 | get { return _customerList.Count; } 22 | } 23 | 24 | 25 | public CustomerListFast(List customers) 26 | { 27 | _customerList = customers; 28 | CreateIndexes(); 29 | } 30 | 31 | 32 | public List FindByGeoAreaID(int geoAreaID) 33 | { 34 | if (_customersByGeoAreaID.ContainsKey(geoAreaID)) 35 | { 36 | return _customersByGeoAreaID[geoAreaID]; 37 | } 38 | else 39 | { 40 | return new List(); 41 | } 42 | } 43 | 44 | 45 | private void CreateIndexes() 46 | { 47 | _customersByGeoAreaID = new Dictionary>(); 48 | 49 | for (int i = 0; i < _customerList.Count; i++) 50 | { 51 | int key = _customerList[i].GeoAreaID; 52 | 53 | if (!_customersByGeoAreaID.ContainsKey(key)) 54 | { 55 | _customersByGeoAreaID.Add(key, new List()); 56 | } 57 | 58 | _customersByGeoAreaID[key].Add(_customerList[i]); 59 | } 60 | } 61 | 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /scripts/sql/ImportDataCommon.sql: -------------------------------------------------------------------------------- 1 | DECLARE @LOGLINE NVARCHAR(10) = Char(10) + Char(10) + '#### ' 2 | 3 | DECLARE @csvCurrExch NVARCHAR(500) 4 | DECLARE @csvCustomer NVARCHAR(500) 5 | DECLARE @csvDate NVARCHAR(500) 6 | DECLARE @csvProduct NVARCHAR(500) 7 | DECLARE @csvStore NVARCHAR(500) 8 | DECLARE @sqlTxt VARCHAR(max) 9 | 10 | PRINT @LOGLINE + 'CD: ' + '$(varCD)' 11 | 12 | SET @csvCurrExch = '$(varCD)' + '\inputcsv\currencyexchange.csv' 13 | SET @csvCustomer = '$(varCD)' + '\inputcsv\customer.csv' 14 | SET @csvDate = '$(varCD)' + '\inputcsv\date.csv' 15 | SET @csvProduct = '$(varCD)' + '\inputcsv\product.csv' 16 | SET @csvStore = '$(varCD)' + '\inputcsv\store.csv' 17 | 18 | 19 | PRINT @LOGLINE + 'Load data' 20 | PRINT @LOGLINE + @csvCurrExch; EXEC ('BULK INSERT [Data].[CurrencyExchange] FROM ''' + @csvCurrExch + ''' WITH (CODEPAGE = ''65001'', TABLOCK, FORMAT=''CSV'', FIRSTROW=2, FIELDTERMINATOR ='','' )'); 21 | PRINT @LOGLINE + @csvCustomer; EXEC ('BULK INSERT [Data].[Customer] FROM ''' + @csvCustomer + ''' WITH (CODEPAGE = ''65001'', TABLOCK, FORMAT=''CSV'', FIRSTROW=2, FIELDTERMINATOR ='','' )'); 22 | PRINT @LOGLINE + @csvDate; EXEC ('BULK INSERT [Data].[Date] FROM ''' + @csvDate + ''' WITH (CODEPAGE = ''65001'', TABLOCK, FORMAT=''CSV'', FIRSTROW=2, FIELDTERMINATOR ='','' )'); 23 | PRINT @LOGLINE + @csvProduct; EXEC ('BULK INSERT [Data].[Product] FROM ''' + @csvProduct + ''' WITH (CODEPAGE = ''65001'', TABLOCK, FORMAT=''CSV'', FIRSTROW=2, FIELDTERMINATOR ='','' )'); 24 | PRINT @LOGLINE + @csvStore; EXEC ('BULK INSERT [Data].[Store] FROM ''' + @csvStore + ''' WITH (CODEPAGE = ''65001'', TABLOCK, FORMAT=''CSV'', FIRSTROW=2, FIELDTERMINATOR ='','' )'); 25 | -------------------------------------------------------------------------------- /scripts/sql/SQLBI_CreateSqlDatabases.ps1: -------------------------------------------------------------------------------- 1 | # 2 | # Instructions 3 | # 4 | # This script only creates the SQL Server databases that are populated by SQLBI_ALL_DB.cmd 5 | # 6 | param( 7 | # Instance of SQL Server to process the database 8 | [String]$SqlServerInstance='Demo', 9 | 10 | # LOCAL Folder for the SQL Server scripts 11 | [String]$sqlScriptsFolder=".\", 12 | 13 | # Temporary folder to store MDF/LDF/BAK/ZIP files during processing 14 | # MUST BE a physical folder accessible by SQL Server, don't use user folders! 15 | [String]$SqlDataFilesFolder='C:\Temp' 16 | ) 17 | 18 | 19 | # Include the list of rows/database name for the database to generate 20 | $databases = @() 21 | $databases += [System.Tuple]::Create( 'Contoso V2 10K' ) 22 | $databases += [System.Tuple]::Create( 'Contoso V2 100K' ) 23 | $databases += [System.Tuple]::Create( 'Contoso V2 1M' ) 24 | $databases += [System.Tuple]::Create( 'Contoso V2 10M DimRatio' ) 25 | $databases += [System.Tuple]::Create( 'Contoso V2 10M' ) 26 | $databases += [System.Tuple]::Create( 'Contoso V2 100M' ) 27 | # $databases += [System.Tuple]::Create( 'Contoso V2 1G' ) 28 | 29 | 30 | $databases | ForEach-Object { 31 | Write-Host ***************************************************************** 32 | 33 | $DatabaseName = $_.Item1 34 | 35 | Write-Host 'Executing '$_ 36 | $Filename = $sqlScriptsFolder + '\CreateDB.sql' 37 | $Original = Get-Content $Filename -raw 38 | $SqlCommand = $Original ` 39 | -replace ('#DATABASE_NAME#',$DatabaseName) ` 40 | -replace ('#SQLDATA_FOLDER#',$SqlDataFilesFolder) ` 41 | -replace ('#SQLBACKUP_FOLDER#',$SqlDataFilesFolder) 42 | 43 | Invoke-Sqlcmd -ServerInstance $SqlServerInstance -Query $SqlCommand -QueryTimeout 60000 -ConnectionTimeout 60000 44 | 45 | } -------------------------------------------------------------------------------- /scripts/sql/Sql_ImportData.cmd: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | ECHO. 3 | 4 | IF "%1" == "" SET SqlServerName=Demo 5 | IF "%1" == "" SET DatabaseName=ContosoV2 6 | 7 | 8 | ECHO. 9 | IF "%1" == "" ( 10 | SET /P "RUNMODE=Enter run mode (sales/orders/both): " 11 | ) ELSE ( 12 | SET RUNMODE=%1 13 | ) 14 | 15 | IF "%2" == "" ( 16 | ECHO No Trim 17 | ) ELSE ( 18 | SET TRIMTABLES=%2 19 | ) 20 | 21 | 22 | @ECHO. 23 | ECHO RunMode: %RUNMODE% 24 | ECHO SqlServerName: %SqlServerName% 25 | ECHO DatabaseName : %DatabaseName% 26 | ECHO TrimTables : %TRIMTABLES% 27 | 28 | @ECHO. 29 | IF "%1" == "" ( PAUSE ) 30 | 31 | 32 | REM --- Create tables --- 33 | sqlcmd -S "%SqlServerName%" -d "%DatabaseName%" -i CreateTablesCommon.sql 34 | IF "%RUNMODE%" == "sales" sqlcmd -S "%SqlServerName%" -d "%DatabaseName%" -i CreateTablesSales.sql 35 | IF "%RUNMODE%" == "orders" sqlcmd -S "%SqlServerName%" -d "%DatabaseName%" -i CreateTablesOrders.sql 36 | IF "%RUNMODE%" == "both" sqlcmd -S "%SqlServerName%" -d "%DatabaseName%" -i CreateTablesSales.sql -i CreateTablesOrders.sql 37 | 38 | REM --- Import data --- 39 | sqlcmd -S "%SqlServerName%" -d "%DatabaseName%" -i ImportDataCommon.sql -v varCD="%CD%" 40 | IF "%RUNMODE%" == "sales" sqlcmd -S "%SqlServerName%" -d "%DatabaseName%" -i ImportDataSales.sql -v varCD="%CD%" 41 | IF "%RUNMODE%" == "orders" sqlcmd -S "%SqlServerName%" -d "%DatabaseName%" -i ImportDataOrders.sql -v varCD="%CD%" 42 | IF "%RUNMODE%" == "both" sqlcmd -S "%SqlServerName%" -d "%DatabaseName%" -i ImportDataSales.sql -i ImportDataOrders.sql -v varCD="%CD%" 43 | 44 | 45 | ECHO 46 | ECHO 47 | IF "%TRIMTABLES%" =="Trim" sqlcmd -S "%SqlServerName%" -d "%DatabaseName%" -i TrimTables.sql -v varCD="%CD%" 48 | 49 | ECHO. 50 | ECHO ### The end ### 51 | ECHO. 52 | 53 | IF "%1" == "" ( PAUSE ) 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Contoso Data Generator V2 2 | 3 | 4 | DataGenerator is a tool for generating sample data, ready to be imported into PowerBI or Fabric OneLake for analysis. This is the V2 version, evolution of the [older one](https://github.com/sql-bi/Contoso-Data-Generator). 5 | 6 | If you are just interested in **ready to use sets of data** , [download them here.](https://github.com/sql-bi/Contoso-Data-Generator-V2-Data) 7 | 8 | Supported output formats: 9 | - Parquet 10 | - Delta Table (files) 11 | - CSV 12 | - CSV multi file 13 | - CSV multi file - gz compressed 14 | - Sql Server, via bulk insert script of the generated CSV files 15 | 16 | 17 | Delta Table output can be directly used in Fabric LakeHouse without any conversion: 18 | 19 |

20 | 21 | Data schema: 22 | 23 | | | | 24 | | -- | -- | 25 | | ![Schema Sales](docs/imgs/schema_sales.svg) | ![Schema Sales](docs/imgs/schema_orders.svg) | 26 | 27 | 28 | ## Usage overview 29 | 30 |
31 | 32 | **FULL DOCUMENTATION** available here: **[⇒ ⇒ https://docs.sqlbi.com/contoso-data-generator/ ⇐ ⇐](https://docs.sqlbi.com/contoso-data-generator/)** 33 | 34 |
35 | 36 | DataGenerator requires four mandatory elements to run: 37 | - a configuration file (json) 38 | - a data file (excel) 39 | - an output folder 40 | - a cache folder 41 | - [optional parameters] 42 | 43 | ``` 44 | databasegenerator.exe configfile datafile outputfolder cachefolder [param:AAAAA=nnnn] [param:BBBBB=mmmm] 45 | ``` 46 | Example: 47 | 48 | ``` 49 | databasegenerator.exe c:\temp\config.json c:\temp\data.xlsx c:\temp\OUT\ c:\temp\CACHE\ 50 | ``` 51 | 52 | **Note**: the tool needs some files containing static data: fake customers, exchange rates, postal codes, etc. The files are cached after been downloaded over the Internet from a specific SQLBI repository. 53 | 54 |
55 |
56 |
-------------------------------------------------------------------------------- /DatabaseGenerator/MyRnd.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | 7 | namespace DatabaseGenerator 8 | { 9 | 10 | public class MyRnd 11 | { 12 | 13 | /// 14 | /// ... 15 | /// 16 | public static int RandomIndexFromWeigthedDistribution(Random rnd, double[] weightVector) 17 | { 18 | //double vectorSum = weightVector.Sum(); 19 | 20 | // this is faster the linq.sum() 21 | double vectorSum = 0; 22 | for (int i = 0; i < weightVector.Length; i++) 23 | { 24 | vectorSum += weightVector[i]; 25 | } 26 | 27 | double x = rnd.NextDouble() * vectorSum; 28 | double sum = 0; 29 | 30 | for (int i = 0; i < weightVector.Length; i++) 31 | { 32 | sum = sum + weightVector[i]; 33 | if (sum >= x) 34 | { 35 | return i; 36 | } 37 | } 38 | 39 | throw new ApplicationException("Impossible"); 40 | } 41 | 42 | 43 | /// 44 | /// ... 45 | /// 46 | public static int RandomIndexFromWeigthedDistribution(Random rnd, DoublesArray weightVector) 47 | { 48 | // This code seems ugly but is uses maximum performance optimization (as far as i know) 49 | 50 | double x = rnd.NextDouble() * weightVector.Sum; 51 | int vectorLen = weightVector.Length; 52 | double[] vector = weightVector.ProgressiveSum; 53 | 54 | for (int i = 0; i < vectorLen; i++) 55 | { 56 | if (vector[i] > x) 57 | { 58 | return i; 59 | } 60 | } 61 | 62 | throw new ApplicationException("Impossible"); 63 | } 64 | 65 | 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Csv/CsvProduct.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | 9 | namespace DatabaseGenerator.DataWriter.Csv 10 | { 11 | 12 | internal class CsvProduct 13 | { 14 | 15 | // --------------------------------------------------------- 16 | public int ProductKey { get; set; } 17 | public string ProductCode { get; set; } 18 | public string ProductName { get; set; } 19 | public string Manufacturer { get; set; } 20 | public string Brand { get; set; } 21 | public string Color { get; set; } 22 | public string WeightUnit { get; set; } 23 | public decimal? Weight { get; set; } 24 | public decimal Cost { get; set; } 25 | public decimal Price { get; set; } 26 | public int CategoryKey { get; set; } 27 | public string CategoryName { get; set; } 28 | public int SubCategoryKey { get; set; } 29 | public string SubCategoryName { get; set; } 30 | // --------------------------------------------------------- 31 | 32 | 33 | public static CsvProduct GetFromProduct(Product p) 34 | { 35 | return new CsvProduct() 36 | { 37 | ProductKey = p.ProductID, 38 | ProductName = p.ProductName, 39 | Price = (decimal)p.Price, 40 | Cost = (decimal)p.Cost, 41 | ProductCode = p.ProductCode, 42 | Manufacturer = p.Manufacturer, 43 | Brand = p.Brand, 44 | Color = p.Color, 45 | WeightUnit = p.WeightUnit, 46 | Weight = p.Weight, 47 | CategoryKey = p.CategoryID, 48 | CategoryName = p.CategoryName, 49 | SubCategoryKey = p.SubCategoryID, 50 | SubCategoryName = p.SubCategoryName 51 | }; 52 | } 53 | 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Parquet/ParquetCurrencyExchange.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using Parquet.Serialization.Attributes; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Text.Json; 8 | using System.Threading.Tasks; 9 | 10 | namespace DatabaseGenerator.DataWriter.Parquet 11 | { 12 | public class ParquetCurrencyExchange 13 | { 14 | 15 | // ---------------------------------------------------------------- 16 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 17 | public DateTime Date { get; set; } 18 | public string FromCurrency { get; set; } 19 | public string ToCurrency { get; set; } 20 | [ParquetDecimal(20, 5)] 21 | public decimal Exchange { get; set; } 22 | // ---------------------------------------------------------------- 23 | 24 | 25 | public static ParquetCurrencyExchange FromCurrencyExchange(CurrencyExchange ce) 26 | { 27 | return new ParquetCurrencyExchange() 28 | { 29 | Date = ce.Date, 30 | FromCurrency = ce.FromCurrency, 31 | ToCurrency = ce.ToCurrency, 32 | Exchange = (decimal)ce.Exchange, 33 | }; 34 | } 35 | 36 | 37 | public static string GETDeltaSchema() 38 | { 39 | var schema = new DeltaSchema() 40 | { 41 | type = "struct", 42 | fields = new List() 43 | { 44 | DeltaField.GetInstance("Date", "timestamp", false), 45 | DeltaField.GetInstance("FromCurrency", "string", true), 46 | DeltaField.GetInstance("ToCurrency", "string", true), 47 | DeltaField.GetInstance("Exchange", "decimal(20,5)", false), 48 | } 49 | }; 50 | return JsonSerializer.Serialize(schema, DeltaSchemaSerializerContext.Default.DeltaSchema); 51 | } 52 | 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Csv/CsvDateExtended.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace DatabaseGenerator.DataWriter.Csv 9 | { 10 | internal class CsvDateExtended 11 | { 12 | public DateTime Date { get; set; } 13 | public string DateKey { get; set; } 14 | public int Year { get; set; } 15 | public string YearQuarter { get; set; } 16 | public int YearQuarterNumber { get; set; } 17 | public string Quarter { get; set; } 18 | public string YearMonth { get; set; } 19 | public string YearMonthShort { get; set; } 20 | public int YearMonthNumber { get; set; } 21 | public string Month { get; set; } 22 | public string MonthShort { get; set; } 23 | public int MonthNumber { get; set; } 24 | public string DayofWeek { get; set; } 25 | public string DayofWeekShort { get; set; } 26 | public int DayofWeekNumber { get; set; } 27 | public int WorkingDay { get; set; } 28 | public int WorkingDayNumber { get; set; } 29 | 30 | 31 | public static CsvDateExtended FromDateExtended(DateExtended de) 32 | { 33 | return new CsvDateExtended() 34 | { 35 | Date = de.Date, 36 | DateKey = de.DateKey, 37 | Year = de.Year, 38 | YearQuarter = de.YearQuarter, 39 | YearQuarterNumber = de.YearQuarterNumber, 40 | Quarter = de.Quarter, 41 | YearMonth = de.YearMonth, 42 | YearMonthShort = de.YearMonthShort, 43 | YearMonthNumber = de.YearMonthNumber, 44 | Month = de.Month, 45 | MonthShort = de.MonthShort, 46 | MonthNumber = de.MonthNumber, 47 | DayofWeek = de.DayofWeek, 48 | DayofWeekShort = de.DayofWeekShort, 49 | DayofWeekNumber = de.DayofWeekNumber, 50 | WorkingDay = de.WorkingDay, 51 | WorkingDayNumber = de.WorkingDayNumber, 52 | }; 53 | } 54 | 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /scripts/sql/SQLBI_ALL_DB.cmd: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | CLS 3 | 4 | SET SqlServerName=Demo 5 | 6 | @ECHO. 7 | ECHO SqlServerName: %SqlServerName% 8 | @ECHO. 9 | PAUSE 10 | 11 | 12 | CALL :BUILDBATABASE "csv-10k" "Contoso V2 10k" "ContosoV210k.bak" "Trim" 13 | CALL :BUILDBATABASE "csv-100k" "Contoso V2 100k" "ContosoV2100k.bak" "Trim" 14 | CALL :BUILDBATABASE "csv-1m" "Contoso V2 1M" "ContosoV21M.bak" "Trim" 15 | CALL :BUILDBATABASE "csv-10m" "Contoso V2 10M DimRatio" "ContosoV210MDimRatio.bak" 16 | CALL :BUILDBATABASE "csv-10m" "Contoso V2 10M" "ContosoV210M.bak" "Trim" 17 | CALL :BUILDBATABASE "csv-100m" "Contoso V2 100M" "ContosoV2100M.bak" "Trim" "-v500m" 18 | 19 | ECHO. 20 | ECHO ######### The end ######### 21 | ECHO. 22 | PAUSE 23 | GOTO :EOF 24 | 25 | 26 | 27 | 28 | :BUILDBATABASE 29 | ECHO. 30 | ECHO BUILDBATABASE params: - data: %~1 db-name: %~2 backup-file: %~3 Trim: %~4 7zip-param: %~5 31 | 32 | SET DatabaseName=%~2 33 | 34 | @ECHO. 35 | ECHO SqlServerName: %SqlServerName% 36 | ECHO DatabaseName : %DatabaseName% 37 | @ECHO. 38 | 39 | IF EXIST "dump\fullbackup.bak" DEL dump\fullbackup.bak 40 | IF EXIST "dump\%~3" DEL dump\%~3 41 | IF EXIST "dump\%~3.7z" DEL dump\%~3.7z 42 | IF EXIST "dump\%~3.7z.*" DEL dump\%~3.7z.* 43 | 44 | ECHO --- Copy CSV files 45 | ECHO -------------------------------------------------------------------------------- 46 | IF EXIST "inputcsv\*.csv" DEL inputcsv\*.csv 47 | COPY ..\build_data\out\%~1\*.csv inputcsv 48 | 49 | ECHO --- Fill database 50 | ECHO -------------------------------------------------------------------------------- 51 | CALL Sql_ImportData.cmd both %~4 52 | 53 | ECHO --- Backup database 54 | ECHO -------------------------------------------------------------------------------- 55 | SQLCMD -S "%SqlServerName%" -d "%DatabaseName%" -Q "select '$(varBackupFile)'; select '$(varDatabaseName)'; BACKUP DATABASE [$(varDatabaseName)] TO DISK = '$(varBackupFile)' WITH COPY_ONLY, NOFORMAT, INIT, NAME = N'ContosoDGV2 full database backup', SKIP, NOREWIND, NOUNLOAD, STATS = 10;" -v varBackupFile="%CD%\dump\fullbackup.bak" -v varDatabaseName="%DatabaseName%" 56 | RENAME dump\fullbackup.bak %~3 57 | ..\build_data\bin\7za.exe a dump\%~3.7z dump\%~3 %~5 58 | 59 | ECHO. 60 | EXIT /B 0 61 | -------------------------------------------------------------------------------- /scripts/build_data/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "OrdersCount": 100000, 3 | "StartDT": "2014-01-01", 4 | "YearsCount": 10, 5 | "CutDateBefore": "2000-05-18", 6 | "CutDateAfter": "2050-11-12", 7 | "CustomerFakeGenerator": -100000, 8 | "CustomerPercentage": 0.15, 9 | "OutputFormat": "CSV", 10 | "SalesOrders": "BOTH", 11 | "DeltaTableOrdersPerFile": 100000, 12 | "-CsvMaxOrdersPerFile": 100000, 13 | "-CsvGzCompression": 1, 14 | 15 | 16 | "DaysWeight": { 17 | "DaysWeightConstant": false, 18 | "DaysWeightPoints": [ 0, 10, 30, 40, 70, 75, 80, 100 ], 19 | "DaysWeightValues": [ 2, 3, 5, 9, 11, 16, 14, 13 ], 20 | "DaysWeightAddSpikes": true, 21 | "WeekDaysFactor": [ 0.2, 0.8, 1, 1.3, 1.4, 1, 1.7 ], 22 | "DayRandomness": 0.05 23 | }, 24 | 25 | "OrderRowsWeights": [ 12, 9, 7, 4, 1, 1, 1 ], 26 | "OrderQuantityWeights": [ 20, 15, 12, 6, 4, 4, 4, 2, 1, 1 ], 27 | "DiscountWeights": [ 80, 5, 2, 2, 3, 4, 4, 8, 9, 10, 15, 20, 20, 15, 9 ], 28 | "OnlinePerCent": [ 0.12, 0.16, 0.20, 0.30, 0.6, 0.5 ], 29 | 30 | "DeliveryDateLambdaWeights": [ 10, 4, 3, 2, 2 ], 31 | 32 | "CountryCurrency": { 33 | "AU": "AUD", 34 | "CA": "CAD", 35 | "FR": "EUR", 36 | "DE": "EUR", 37 | "IT": "EUR", 38 | "NL": "EUR", 39 | "GB": "GBP", 40 | "US": "USD" 41 | }, 42 | 43 | "AnnualSpikes": [ 44 | { 45 | "StartDay": 40, 46 | "EndDay": 60, 47 | "Factor": 2 48 | }, 49 | { 50 | "StartDay": 70, 51 | "EndDay": 120, 52 | "Factor": 0.2 53 | }, 54 | { 55 | "StartDay": 200, 56 | "EndDay": 220, 57 | "Factor": 0.5 58 | }, 59 | { 60 | "StartDay": 345, 61 | "EndDay": 375, 62 | "Factor": 1.4 63 | } 64 | ], 65 | 66 | "OneTimeSpikes": [ 67 | { 68 | "DT1": "2016-09-25", 69 | "DT2": "2016-09-25", 70 | "Factor": 3 71 | }, 72 | { 73 | "DT1": "2018-06-01", 74 | "DT2": "2018-06-15", 75 | "Factor": 2 76 | }, 77 | { 78 | "DT1": "2020-02-28", 79 | "DT2": "2021-06-15", 80 | "Factor": 0.3 81 | } 82 | ], 83 | 84 | "CustomerActivity": { 85 | "StartDateWeightPoints": [ 1980, 1990, 2000, 2010 ], 86 | "StartDateWeightValues": [ 10, 10, 5, 5 ], 87 | "EndDateWeightPoints": [ 2005, 2015, 2025, 2035, 2045 ], 88 | "EndDateWeightValues": [ 5, 5, 10, 15, 25 ] 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Parquet/ParquetOrder.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using Parquet.Serialization.Attributes; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Runtime.InteropServices; 6 | using System.Text.Json; 7 | 8 | 9 | namespace DatabaseGenerator.DataWriter.Parquet 10 | { 11 | 12 | public class ParquetOrder 13 | { 14 | 15 | // --------------------------------------------------------- 16 | public long OrderKey { get; set; } 17 | public int CustomerKey { get; set; } 18 | public int StoreKey { get; set; } 19 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 20 | public DateTime DT { get; set; } 21 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 22 | public DateTime DeliveryDate { get; set; } 23 | public string CurrencyCode { get; set; } 24 | // --------------------------------------------------------- 25 | 26 | 27 | public static ParquetOrder FromOrder(Order order) 28 | { 29 | return new ParquetOrder() 30 | { 31 | OrderKey = order.OrderID, 32 | CustomerKey = order.CustomerID, 33 | StoreKey = order.StoreID, 34 | DT = order.DT, 35 | DeliveryDate = order.DeliveryDate, 36 | CurrencyCode = order.CurrencyCode 37 | }; 38 | } 39 | 40 | 41 | public static string GETDeltaSchema() 42 | { 43 | var schema = new DeltaSchema() 44 | { 45 | type = "struct", 46 | fields = new List() 47 | { 48 | DeltaField.GetInstance("OrderKey", "long", false), 49 | DeltaField.GetInstance("CustomerKey", "integer", false), 50 | DeltaField.GetInstance("StoreKey", "integer", false), 51 | DeltaField.GetInstance("DT", "timestamp", false), 52 | DeltaField.GetInstance("DeliveryDate", "timestamp", false), 53 | DeltaField.GetInstance("CurrencyCode", "string", true), 54 | } 55 | }; 56 | 57 | return JsonSerializer.Serialize(schema, DeltaSchemaSerializerContext.Default.DeltaSchema); 58 | } 59 | 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /scripts/publish_tool/publish_tool.cmd: -------------------------------------------------------------------------------- 1 | DEL ..\..\DatabaseGenerator\bin\publish\*.* /Q 2 | DEL ..\..\DatabaseGenerator\bin\publish\win-x64\*.* /Q 3 | DEL ..\..\DatabaseGenerator\bin\publish\linux-x64\*.* /Q 4 | DEL ..\..\DatabaseGenerator\bin\publish\osx-x64\*.* /Q 5 | DEL ..\..\DatabaseGenerator\bin\publish\osx-arm64\*.* /Q 6 | 7 | dotnet build ../../ContosoDGV2.sln --no-incremental --configuration Release 8 | dotnet publish ../../ContosoDGV2.sln -p:PublishProfile=WinX64 9 | dotnet publish ../../ContosoDGV2.sln -p:PublishProfile=LinuxX64 10 | dotnet publish ../../ContosoDGV2.sln -p:PublishProfile=osx-x64 11 | dotnet publish ../../ContosoDGV2.sln -p:PublishProfile=osx-arm64 12 | 13 | COPY config.json ..\..\DatabaseGenerator\bin\publish\win-x64 /Y 14 | COPY data.xlsx ..\..\DatabaseGenerator\bin\publish\win-x64 /Y 15 | COPY readme.txt ..\..\DatabaseGenerator\bin\publish\win-x64 /Y 16 | 17 | COPY config.json ..\..\DatabaseGenerator\bin\publish\linux-x64 /Y 18 | COPY data.xlsx ..\..\DatabaseGenerator\bin\publish\linux-x64 /Y 19 | COPY readme.txt ..\..\DatabaseGenerator\bin\publish\linux-x64 /Y 20 | 21 | COPY config.json ..\..\DatabaseGenerator\bin\publish\osx-x64 /Y 22 | COPY data.xlsx ..\..\DatabaseGenerator\bin\publish\osx-x64 /Y 23 | COPY readme.txt ..\..\DatabaseGenerator\bin\publish\osx-x64 /Y 24 | 25 | COPY config.json ..\..\DatabaseGenerator\bin\publish\osx-arm64 /Y 26 | COPY data.xlsx ..\..\DatabaseGenerator\bin\publish\osx-arm64 /Y 27 | COPY readme.txt ..\..\DatabaseGenerator\bin\publish\osx-arm64 /Y 28 | 29 | 30 | cd ..\..\DatabaseGenerator\bin\publish\win-x64 31 | TAR.exe -a -c -f DatabaseGenerator.winx64.zip readme.txt config.json data.xlsx DatabaseGenerator.exe 32 | COPY DatabaseGenerator.winx64.zip ..\DatabaseGenerator.winx64.zip 33 | 34 | cd ..\linux-x64 35 | TAR.exe -a -c -f DatabaseGenerator.linuxx64.zip readme.txt config.json data.xlsx DatabaseGenerator 36 | COPY DatabaseGenerator.linuxx64.zip ..\DatabaseGenerator.linuxx64.zip 37 | 38 | cd ..\osx-x64 39 | TAR.exe -a -c -f DatabaseGenerator.osx-x64.zip readme.txt config.json data.xlsx DatabaseGenerator 40 | COPY DatabaseGenerator.osx-x64.zip ..\DatabaseGenerator.osx-x64.zip 41 | 42 | cd ..\osx-arm64 43 | TAR.exe -a -c -f DatabaseGenerator.osx-arm64.zip readme.txt config.json data.xlsx DatabaseGenerator 44 | COPY DatabaseGenerator.osx-arm64.zip ..\DatabaseGenerator.osx-arm64.zip 45 | 46 | 47 | PAUSE -------------------------------------------------------------------------------- /_test_data/IN/config_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "OrdersCount": 500000, 3 | "StartDT": "2014-01-01T00:00:00Z", 4 | "YearsCount": 3, 5 | "CutDateBefore": "2000-02-15", 6 | "CutDateAfter": "2050-11-20", 7 | "CustomerFakeGenerator": -100000, 8 | "CustomerPercentage": 0.15, 9 | "OutputFormat": "CSV", 10 | "SalesOrders": "BOTH", 11 | "DeltaTableOrdersPerFile": 1000000, 12 | "ParquetOrdersRowGroupSize": 250000, 13 | "-CsvMaxOrdersPerFile": 100000, 14 | "-CsvGzCompression": 1, 15 | 16 | 17 | "DaysWeight": { 18 | "DaysWeightConstant": false, 19 | "DaysWeightPoints": [ 0, 10, 30, 40, 70, 75, 80, 100 ], 20 | "DaysWeightValues": [ 2, 3, 5, 9, 11, 16, 14, 13 ], 21 | "DaysWeightAddSpikes": true, 22 | "WeekDaysFactor": [ 0.7, 0.8, 1, 1.3, 1.2, 1, 1.6 ], 23 | "DayRandomness": 0.05 24 | }, 25 | 26 | "OrderRowsWeights": [ 12, 9, 7, 4, 1, 1, 1 ], 27 | "OrderQuantityWeights": [ 20, 15, 12, 6, 4, 4, 4, 2, 1, 1 ], 28 | "DiscountWeights": [ 80, 5, 2, 2, 3, 4, 4, 8, 9, 10, 15, 20, 20, 15, 9 ], 29 | "OnlinePerCent": [ 0.12, 0.16, 0.20, 0.30, 0.6, 0.5 ], 30 | 31 | "DeliveryDateLambdaWeights": [ 10, 4, 3, 2, 2 ], 32 | 33 | "CountryCurrency": { 34 | "AU": "AUD", 35 | "CA": "CAD", 36 | "FR": "EUR", 37 | "DE": "EUR", 38 | "IT": "EUR", 39 | "NL": "EUR", 40 | "GB": "GBP", 41 | "US": "USD" 42 | }, 43 | 44 | "AnnualSpikes": [ 45 | { 46 | "StartDay": 10, 47 | "EndDay": 60, 48 | "Factor": 2 49 | }, 50 | { 51 | "StartDay": 100, 52 | "EndDay": 120, 53 | "Factor": 1 54 | }, 55 | { 56 | "StartDay": 200, 57 | "EndDay": 260, 58 | "Factor": 0.5 59 | }, 60 | { 61 | "StartDay": 345, 62 | "EndDay": 375, 63 | "Factor": 1.5 64 | } 65 | ], 66 | 67 | "OneTimeSpikes": [ 68 | { 69 | "DT1": "2014-02-25", 70 | "DT2": "2014-11-25", 71 | "Factor": 0.5 72 | }, 73 | { 74 | "DT1": "2018-01-01", 75 | "DT2": "2018-08-15", 76 | "Factor": 1.5 77 | }, 78 | { 79 | "DT1": "2020-09-28", 80 | "DT2": "2021-09-15", 81 | "Factor": 0.5 82 | } 83 | ], 84 | 85 | "CustomerActivity": { 86 | "StartDateWeightPoints": [ 1980, 1990, 2000, 2010 ], 87 | "StartDateWeightValues": [ 10, 10, 5, 5 ], 88 | "EndDateWeightPoints": [ 2005, 2015, 2025, 2035, 2045 ], 89 | "EndDateWeightValues": [ 5, 5, 10, 15, 25 ] 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /scripts/publish_tool/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "OrdersCount": 500000, 3 | "StartDT": "2014-01-01T00:00:00Z", 4 | "YearsCount": 3, 5 | "CutDateBefore": "2000-02-15", 6 | "CutDateAfter": "2050-11-20", 7 | "CustomerFakeGenerator": -100000, 8 | "CustomerPercentage": 0.15, 9 | "OutputFormat": "CSV", 10 | "SalesOrders": "BOTH", 11 | "DeltaTableOrdersPerFile": 1000000, 12 | "ParquetOrdersRowGroupSize": 250000, 13 | "-CsvMaxOrdersPerFile": 100000, 14 | "-CsvGzCompression": 1, 15 | 16 | 17 | "DaysWeight": { 18 | "DaysWeightConstant": false, 19 | "DaysWeightPoints": [ 0, 10, 30, 40, 70, 75, 80, 100 ], 20 | "DaysWeightValues": [ 2, 3, 5, 9, 11, 16, 14, 13 ], 21 | "DaysWeightAddSpikes": true, 22 | "WeekDaysFactor": [ 0.7, 0.8, 1, 1.3, 1.2, 1, 1.6 ], 23 | "DayRandomness": 0.05 24 | }, 25 | 26 | "OrderRowsWeights": [ 12, 9, 7, 4, 1, 1, 1 ], 27 | "OrderQuantityWeights": [ 20, 15, 12, 6, 4, 4, 4, 2, 1, 1 ], 28 | "DiscountWeights": [ 80, 5, 2, 2, 3, 4, 4, 8, 9, 10, 15, 20, 20, 15, 9 ], 29 | "OnlinePerCent": [ 0.12, 0.16, 0.20, 0.30, 0.6, 0.5 ], 30 | 31 | "DeliveryDateLambdaWeights": [ 10, 4, 3, 2, 2 ], 32 | 33 | "CountryCurrency": { 34 | "AU": "AUD", 35 | "CA": "CAD", 36 | "FR": "EUR", 37 | "DE": "EUR", 38 | "IT": "EUR", 39 | "NL": "EUR", 40 | "GB": "GBP", 41 | "US": "USD" 42 | }, 43 | 44 | "AnnualSpikes": [ 45 | { 46 | "StartDay": 10, 47 | "EndDay": 60, 48 | "Factor": 2 49 | }, 50 | { 51 | "StartDay": 100, 52 | "EndDay": 120, 53 | "Factor": 1 54 | }, 55 | { 56 | "StartDay": 200, 57 | "EndDay": 260, 58 | "Factor": 0.5 59 | }, 60 | { 61 | "StartDay": 345, 62 | "EndDay": 375, 63 | "Factor": 1.5 64 | } 65 | ], 66 | 67 | "OneTimeSpikes": [ 68 | { 69 | "DT1": "2014-02-25", 70 | "DT2": "2014-11-25", 71 | "Factor": 0.5 72 | }, 73 | { 74 | "DT1": "2018-01-01", 75 | "DT2": "2018-08-15", 76 | "Factor": 1.5 77 | }, 78 | { 79 | "DT1": "2020-09-28", 80 | "DT2": "2021-09-15", 81 | "Factor": 0.5 82 | } 83 | ], 84 | 85 | "CustomerActivity": { 86 | "StartDateWeightPoints": [ 1980, 1990, 2000, 2010 ], 87 | "StartDateWeightValues": [ 10, 10, 5, 5 ], 88 | "EndDateWeightPoints": [ 2005, 2015, 2025, 2035, 2045 ], 89 | "EndDateWeightValues": [ 5, 5, 10, 15, 25 ] 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /scripts/build_data/build_single.cmd: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | ECHO --- Start 3 | 4 | IF "%1" == "" ( 5 | SET /P "id=Enter ID: " 6 | ) ELSE ( 7 | SET id=%1 8 | ) 9 | 10 | ECHO OPTION:: %id% 11 | 12 | IF "%id%" == "csv-10k" CALL :do_build %id% CSV 10000 0.05 2015-01-01 10 2021-05-18 2024-04-20 "" 13 | IF "%id%" == "delta-10k" CALL :do_build %id% DELTATABLE 10000 0.05 2015-01-01 10 2021-05-18 2024-04-20 2000 14 | IF "%id%" == "parquet-10k" CALL :do_build %id% PARQUET 10000 0.05 2015-01-01 10 2021-05-18 2024-04-20 "" 15 | IF "%id%" == "csv-100k" CALL :do_build %id% CSV 100000 0.05 2015-01-01 10 2014-05-18 2024-04-20 "" 16 | IF "%id%" == "delta-100k" CALL :do_build %id% DELTATABLE 100000 0.05 2015-01-01 10 2014-05-18 2024-04-20 20000 17 | IF "%id%" == "parquet-100k" CALL :do_build %id% PARQUET 100000 0.05 2015-01-01 10 2014-05-18 2024-04-20 "" 18 | IF "%id%" == "csv-1m" CALL :do_build %id% CSV 1000000 0.05 2015-01-01 10 2014-05-18 2024-04-20 "" 19 | IF "%id%" == "delta-1m" CALL :do_build %id% DELTATABLE 1000000 0.05 2015-01-01 10 2014-05-18 2024-04-20 200000 20 | IF "%id%" == "parquet-1m" CALL :do_build %id% PARQUET 1000000 0.05 2015-01-01 10 2014-05-18 2024-04-20 "" 21 | IF "%id%" == "csv-10m" CALL :do_build %id% CSV 10000000 0.80 2015-01-01 10 2014-05-18 2024-04-20 "" 22 | IF "%id%" == "delta-10m" CALL :do_build %id% DELTATABLE 10000000 0.80 2015-01-01 10 2014-05-18 2024-04-20 2000000 23 | IF "%id%" == "parquet-10m" CALL :do_build %id% PARQUET 10000000 0.80 2015-01-01 10 2014-05-18 2024-04-20 "" 24 | IF "%id%" == "csv-100m" CALL :do_build %id% CSV 100000000 1.00 2015-01-01 10 2014-05-18 2024-04-20 "" 25 | IF "%id%" == "delta-100m" CALL :do_build %id% DELTATABLE 100000000 1.00 2015-01-01 10 2014-05-18 2024-04-20 20000000 26 | IF "%id%" == "parquet-100m" CALL :do_build %id% PARQUET 100000000 1.00 2015-01-01 10 2014-05-18 2024-04-20 "" 27 | 28 | ECHO --- End 29 | GOTO :eof 30 | 31 | 32 | :do_build 33 | ECHO DO BUILD : %1 %2 %3 %4 %5 %6 %7 %8 %9 34 | ..\..\DatabaseGenerator\bin\Release\net8.0\DatabaseGenerator.exe config.json data.xlsx out\%1 cache param:OutputFormat=%2 param:OrdersCount=%3 param:CustomerPercentage=%4 param:StartDT=%5 param:YearsCount=%6 param:CutDateBefore=%7 param:CutDateAfter=%8 param:DeltaTableOrdersPerFile=%9 35 | 36 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Parquet/ParquetOrderRow.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using Parquet.Serialization.Attributes; 3 | using System.Collections.Generic; 4 | using System.Text.Json; 5 | 6 | 7 | namespace DatabaseGenerator.DataWriter.Parquet 8 | { 9 | 10 | public class ParquetOrderRow 11 | { 12 | 13 | // --------------------------------------------------------- 14 | public long OrderKey { get; set; } 15 | public int RowNumber { get; set; } 16 | public int ProductKey { get; set; } 17 | public int Quantity { get; set; } 18 | [ParquetDecimal(20, 5)] 19 | public decimal UnitPrice { get; set; } 20 | [ParquetDecimal(20, 5)] 21 | public decimal NetPrice { get; set; } 22 | [ParquetDecimal(20, 5)] 23 | public decimal UnitCost { get; set; } 24 | // --------------------------------------------------------- 25 | 26 | 27 | public static ParquetOrderRow FromOrderRow(Order order, OrderRow orderRow) 28 | { 29 | return new ParquetOrderRow() 30 | { 31 | OrderKey = order.OrderID, 32 | RowNumber = orderRow.RowNumber, 33 | ProductKey = orderRow.ProductID, 34 | Quantity = orderRow.Quantity, 35 | UnitPrice = orderRow.UnitPrice, 36 | NetPrice = orderRow.NetPrice, 37 | UnitCost = orderRow.UnitCost, 38 | }; 39 | } 40 | 41 | 42 | public static string GETDeltaSchema() 43 | { 44 | var schema = new DeltaSchema() 45 | { 46 | type = "struct", 47 | fields = new List() 48 | { 49 | DeltaField.GetInstance("OrderKey", "long", false), 50 | DeltaField.GetInstance("RowNumber", "integer", false), 51 | DeltaField.GetInstance("ProductKey", "integer", false), 52 | DeltaField.GetInstance("Quantity", "integer", false), 53 | DeltaField.GetInstance("UnitPrice", "decimal(20,5)", false), 54 | DeltaField.GetInstance("NetPrice", "decimal(20,5)", true), 55 | DeltaField.GetInstance("UnitCost", "decimal(20,5)", true), 56 | } 57 | }; 58 | 59 | return JsonSerializer.Serialize(schema, DeltaSchemaSerializerContext.Default.DeltaSchema); 60 | } 61 | 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Csv/CsvCustomer.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace DatabaseGenerator.DataWriter.Csv 9 | { 10 | public class CsvCustomer 11 | { 12 | public int CustomerKey { get; set; } 13 | public int GeoAreaKey { get; set; } 14 | public DateTime? StartDT { get; set; } 15 | public DateTime? EndDT { get; set; } 16 | public string Continent { get; set; } 17 | public string Gender { get; set; } 18 | public string Title { get; set; } 19 | public string GivenName { get; set; } 20 | public string MiddleInitial { get; set; } 21 | public string Surname { get; set; } 22 | public string StreetAddress { get; set; } 23 | public string City { get; set; } 24 | public string State { get; set; } 25 | public string StateFull { get; set; } 26 | public string ZipCode { get; set; } 27 | public string Country { get; set; } 28 | public string CountryFull { get; set; } 29 | public DateTime Birthday { get; set; } 30 | public int Age { get; set; } 31 | public string Occupation { get; set; } 32 | public string Company { get; set; } 33 | public string Vehicle { get; set; } 34 | public decimal Latitude { get; set; } 35 | public decimal Longitude { get; set; } 36 | 37 | 38 | public static CsvCustomer FromCustomer(Customer c) 39 | { 40 | return new CsvCustomer() 41 | { 42 | CustomerKey = c.CustomerID, 43 | GeoAreaKey = c.GeoAreaID, 44 | StartDT = c.StartDT, 45 | EndDT = c.EndDT, 46 | Continent = c.Continent, 47 | Gender = c.Gender, 48 | Title = c.Title, 49 | GivenName = c.GivenName, 50 | MiddleInitial = c.MiddleInitial, 51 | Surname = c.Surname, 52 | StreetAddress = c.StreetAddress, 53 | City = c.City, 54 | State = c.State, 55 | StateFull = c.StateFull, 56 | ZipCode = c.ZipCode, 57 | Country = c.Country, 58 | CountryFull = c.CountryFull, 59 | Birthday = c.Birthday, 60 | Age = c.Age, 61 | Occupation = c.Occupation, 62 | Company = c.Company, 63 | Vehicle = c.Vehicle, 64 | Latitude = (decimal)c.Latitude, 65 | Longitude = (decimal)c.Longitude, 66 | }; 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /DatabaseGenerator/Models/Customer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | 4 | namespace DatabaseGenerator.Models 5 | { 6 | 7 | public class Customer 8 | { 9 | public int CustomerID { get; set; } 10 | public int GeoAreaID { get; set; } // not present in .csv.gz 11 | public DateTime? StartDT { get; set; } // not present in .csv.gz // not valorized in customerall.csv 12 | public DateTime? EndDT { get; set; } // not present in .csv.gz // not valorized in customerall.csv 13 | public string Continent { get; set; } // not present in .csv.gz 14 | 15 | 16 | //public string Number { get; set; } // mapped to CustomerID 17 | public string Gender { get; set; } 18 | //public string NameSet { get; set; } 19 | public string Title { get; set; } 20 | public string GivenName { get; set; } 21 | public string MiddleInitial { get; set; } 22 | public string Surname { get; set; } 23 | public string StreetAddress { get; set; } 24 | public string City { get; set; } 25 | public string State { get; set; } 26 | public string StateFull { get; set; } 27 | public string ZipCode { get; set; } 28 | public string Country { get; set; } 29 | public string CountryFull { get; set; } 30 | //public string EmailAddress { get; set; } 31 | //public string Username { get; set; } 32 | //public string Password { get; set; } 33 | //public string BrowserUserAgent { get; set; } 34 | //public string TelephoneNumber { get; set; } 35 | //public string TelephoneCountryCode { get; set; } 36 | //public string MothersMaiden { get; set; } 37 | public DateTime Birthday { get; set; } 38 | public int Age { get; set; } 39 | //public string TropicalZodiac { get; set; } 40 | //public string CCType { get; set; } 41 | //public string CCNumber { get; set; } 42 | //public string CVV2 { get; set; } 43 | //public string CCExpires { get; set; } 44 | //public string NationalID { get; set; } 45 | //public string UPS { get; set; } 46 | //public string WesternUnionMTCN { get; set; } 47 | //public string MoneyGramMTCN { get; set; } 48 | //public string Color { get; set; } 49 | public string Occupation { get; set; } 50 | public string Company { get; set; } 51 | public string Vehicle { get; set; } 52 | //public string Domain { get; set; } 53 | //public string BloodType { get; set; } 54 | //public string Pounds { get; set; } 55 | //public string Kilograms { get; set; } 56 | //public string FeetInches { get; set; } 57 | //public string Centimeters { get; set; } 58 | //public string GUID { get; set; } 59 | public double Latitude { get; set; } 60 | public double Longitude { get; set; } 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Parquet/ParquetStore.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using Parquet.Serialization.Attributes; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Text.Json; 8 | using System.Threading.Tasks; 9 | 10 | 11 | namespace DatabaseGenerator.DataWriter.Parquet 12 | { 13 | 14 | public class ParquetStore 15 | { 16 | 17 | // --------------------------------------------------------- 18 | public int StoreKey { get; set; } 19 | public int StoreCode { get; set; } 20 | public int GeoAreaKey { get; set; } 21 | public string CountryCode { get; set; } 22 | public string CountryName { get; set; } 23 | public string State { get; set; } 24 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 25 | public DateTime? OpenDate { get; set; } 26 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 27 | public DateTime? CloseDate { get; set; } 28 | public string Description { get; set; } 29 | public int? SquareMeters { get; set; } 30 | public string Status { get; set; } 31 | // --------------------------------------------------------- 32 | 33 | public static ParquetStore FromStore(Store s) 34 | { 35 | return new ParquetStore() 36 | { 37 | StoreKey = s.StoreID, 38 | StoreCode = s.StoreCode, 39 | GeoAreaKey = s.GeoAreaID, 40 | CountryCode = s.CountryCode, 41 | CountryName = s.Country, 42 | State = s.State, 43 | OpenDate = s.OpenDate, 44 | CloseDate = s.CloseDate, 45 | Description = s.Description, 46 | SquareMeters = s.SquareMeters, 47 | Status = s.Status, 48 | }; 49 | } 50 | 51 | public static string GETDeltaSchema() 52 | { 53 | var schema = new DeltaSchema() 54 | { 55 | type = "struct", 56 | fields = new List() 57 | { 58 | DeltaField.GetInstance("StoreKey", "integer", false), 59 | DeltaField.GetInstance("StoreCode", "integer", false), 60 | DeltaField.GetInstance("GeoAreaKey", "integer", false), 61 | DeltaField.GetInstance("CountryCode", "string", true), 62 | DeltaField.GetInstance("CountryName", "string", true), 63 | DeltaField.GetInstance("State", "string", true), 64 | DeltaField.GetInstance("OpenDate", "timestamp", true), 65 | DeltaField.GetInstance("CloseDate", "timestamp", true), 66 | DeltaField.GetInstance("Description", "string", true), 67 | DeltaField.GetInstance("SquareMeters", "integer", true), 68 | DeltaField.GetInstance("Status", "string", true), 69 | } 70 | }; 71 | return JsonSerializer.Serialize(schema, DeltaSchemaSerializerContext.Default.DeltaSchema); 72 | } 73 | 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /DatabaseGenerator/Converter2OUT.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | 9 | namespace DatabaseGenerator 10 | { 11 | 12 | public static class Converter2OUT 13 | { 14 | 15 | public static void EnrichProducts(List inProductList, List inCategoryList, List inSubcategoryList) 16 | { 17 | //var outProductList = new List(); 18 | 19 | foreach (var inProduct in inProductList) 20 | { 21 | SubCategory subCat = inSubcategoryList.Single(x => x.SubCategoryID == inProduct.SubCategoryID); 22 | Category cat = inCategoryList.Single(x => x.CategoryID == subCat.CategoryID); 23 | 24 | //var outProduct = new ModelsOUT.ProductOUT() 25 | //{ 26 | // ProductKey = inProduct.ProductID, 27 | // ProductCode = inProduct.ProductCode, 28 | // ProductName = inProduct.ProductName, 29 | // Manufacturer = inProduct.Manufacturer, 30 | // Brand = inProduct.Brand, 31 | // Color = inProduct.Color, 32 | // WeightUnit = inProduct.WeightUnit, 33 | // Weight = inProduct.Weight, 34 | // Cost = inProduct.Cost, 35 | // Price = inProduct.Price, 36 | // SubCategoryCode = inProduct.SubCategoryID, 37 | // SubCategory = subCat.SubCategoryName, 38 | // CategoryCode = cat.CategoryID, 39 | // Category = cat.CategoryName 40 | //}; 41 | 42 | inProduct.CategoryID = cat.CategoryID; 43 | inProduct.CategoryName = cat.CategoryName; 44 | inProduct.SubCategoryName = subCat.SubCategoryName; 45 | 46 | 47 | //outProductList.Add(outProduct); 48 | } 49 | 50 | //return outProductList; 51 | } 52 | 53 | 54 | public static void EnrichStores(List storesIN, List geoAreas) 55 | { 56 | //var items = new List(); 57 | 58 | foreach (var storeIN in storesIN) 59 | { 60 | var geoArea = geoAreas.Single(x => x.GeoAreaID == storeIN.GeoAreaID); 61 | 62 | //items.Add( 63 | // new ModelsOUT.StoreOUT() 64 | // { 65 | // StoreKey = storeIN.StoreID, 66 | // StoreCode = storeIN.StoreCode, 67 | // Country = geoArea.Country, 68 | // State = geoArea.StateLongName, 69 | // Name = storeIN.Description, 70 | // SquareMeters = storeIN.SquareMeters, 71 | // OpenDate = storeIN.OpenDate, 72 | // CloseDate = storeIN.CloseDate, 73 | // Status = storeIN.Status 74 | // }); 75 | 76 | storeIN.Country = geoArea.Country; 77 | storeIN.State = geoArea.StateLongName; 78 | } 79 | 80 | //return items; 81 | } 82 | 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /scripts/sql/CreateDB.sql: -------------------------------------------------------------------------------- 1 | IF EXISTS (SELECT name FROM sys.databases WHERE name = N'#DATABASE_NAME#') 2 | DROP DATABASE [#DATABASE_NAME#] 3 | GO 4 | 5 | CREATE DATABASE [#DATABASE_NAME#] 6 | CONTAINMENT = NONE 7 | ON PRIMARY 8 | ( NAME = N'#DATABASE_NAME#', FILENAME = N'#SQLDATA_FOLDER#\#DATABASE_NAME#.mdf' , SIZE = 5120KB , FILEGROWTH = 5120KB ) 9 | LOG ON 10 | ( NAME = N'#DATABASE_NAME#_log', FILENAME = N'#SQLDATA_FOLDER#\#DATABASE_NAME#_log.ldf' , SIZE = 2048KB , FILEGROWTH = 2048KB ) 11 | GO 12 | ALTER DATABASE [#DATABASE_NAME#] SET COMPATIBILITY_LEVEL = 100 13 | GO 14 | ALTER DATABASE [#DATABASE_NAME#] SET ANSI_NULL_DEFAULT OFF 15 | GO 16 | ALTER DATABASE [#DATABASE_NAME#] SET ANSI_NULLS OFF 17 | GO 18 | ALTER DATABASE [#DATABASE_NAME#] SET ANSI_PADDING OFF 19 | GO 20 | ALTER DATABASE [#DATABASE_NAME#] SET ANSI_WARNINGS OFF 21 | GO 22 | ALTER DATABASE [#DATABASE_NAME#] SET ARITHABORT OFF 23 | GO 24 | ALTER DATABASE [#DATABASE_NAME#] SET AUTO_CLOSE OFF 25 | GO 26 | ALTER DATABASE [#DATABASE_NAME#] SET AUTO_SHRINK OFF 27 | GO 28 | ALTER DATABASE [#DATABASE_NAME#] SET AUTO_CREATE_STATISTICS ON(INCREMENTAL = OFF) 29 | GO 30 | ALTER DATABASE [#DATABASE_NAME#] SET AUTO_UPDATE_STATISTICS ON 31 | GO 32 | ALTER DATABASE [#DATABASE_NAME#] SET CURSOR_CLOSE_ON_COMMIT OFF 33 | GO 34 | ALTER DATABASE [#DATABASE_NAME#] SET CURSOR_DEFAULT GLOBAL 35 | GO 36 | ALTER DATABASE [#DATABASE_NAME#] SET CONCAT_NULL_YIELDS_NULL OFF 37 | GO 38 | ALTER DATABASE [#DATABASE_NAME#] SET NUMERIC_ROUNDABORT OFF 39 | GO 40 | ALTER DATABASE [#DATABASE_NAME#] SET QUOTED_IDENTIFIER OFF 41 | GO 42 | ALTER DATABASE [#DATABASE_NAME#] SET RECURSIVE_TRIGGERS OFF 43 | GO 44 | ALTER DATABASE [#DATABASE_NAME#] SET DISABLE_BROKER 45 | GO 46 | ALTER DATABASE [#DATABASE_NAME#] SET AUTO_UPDATE_STATISTICS_ASYNC OFF 47 | GO 48 | ALTER DATABASE [#DATABASE_NAME#] SET DATE_CORRELATION_OPTIMIZATION OFF 49 | GO 50 | ALTER DATABASE [#DATABASE_NAME#] SET PARAMETERIZATION SIMPLE 51 | GO 52 | ALTER DATABASE [#DATABASE_NAME#] SET READ_COMMITTED_SNAPSHOT OFF 53 | GO 54 | ALTER DATABASE [#DATABASE_NAME#] SET READ_WRITE 55 | GO 56 | ALTER DATABASE [#DATABASE_NAME#] SET RECOVERY SIMPLE 57 | GO 58 | ALTER DATABASE [#DATABASE_NAME#] SET MULTI_USER 59 | GO 60 | ALTER DATABASE [#DATABASE_NAME#] SET PAGE_VERIFY CHECKSUM 61 | GO 62 | ALTER DATABASE [#DATABASE_NAME#] SET TARGET_RECOVERY_TIME = 60 SECONDS 63 | GO 64 | ALTER DATABASE [#DATABASE_NAME#] SET DELAYED_DURABILITY = DISABLED 65 | GO 66 | USE [#DATABASE_NAME#] 67 | GO 68 | ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = Off; 69 | GO 70 | ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET LEGACY_CARDINALITY_ESTIMATION = Primary; 71 | GO 72 | ALTER DATABASE SCOPED CONFIGURATION SET MAXDOP = 0; 73 | GO 74 | ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET MAXDOP = PRIMARY; 75 | GO 76 | ALTER DATABASE SCOPED CONFIGURATION SET PARAMETER_SNIFFING = On; 77 | GO 78 | ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET PARAMETER_SNIFFING = Primary; 79 | GO 80 | ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = Off; 81 | GO 82 | ALTER DATABASE SCOPED CONFIGURATION FOR SECONDARY SET QUERY_OPTIMIZER_HOTFIXES = Primary; 83 | GO 84 | USE [#DATABASE_NAME#] 85 | GO 86 | IF NOT EXISTS (SELECT name FROM sys.filegroups WHERE is_default=1 AND name = N'PRIMARY') ALTER DATABASE [#DATABASE_NAME#] MODIFY FILEGROUP [PRIMARY] DEFAULT 87 | GO -------------------------------------------------------------------------------- /scripts/sql/CreateTablesSales.sql: -------------------------------------------------------------------------------- 1 | PRINT 'SQL > Creating [Data].[Sales]' 2 | 3 | CREATE TABLE [Data].[Sales]( 4 | [OrderKey] [bigint] NOT NULL, 5 | [LineNumber] [int] NOT NULL, 6 | [OrderDate] [date] NOT NULL, 7 | [DeliveryDate] [date] NOT NULL, 8 | [CustomerKey] [int] NOT NULL, 9 | [StoreKey] [int] NOT NULL, 10 | [ProductKey] [int] NOT NULL, 11 | [Quantity] [int] NOT NULL, 12 | [UnitPrice] [money] NOT NULL, 13 | [NetPrice] [money] NOT NULL, 14 | [UnitCost] [money] NOT NULL, 15 | [CurrencyCode] [nvarchar](5) NOT NULL, 16 | [ExchangeRate] [float] NOT NULL, 17 | CONSTRAINT [PK_Sales] PRIMARY KEY CLUSTERED ( [OrderKey] ASC, [LineNumber] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 18 | ) ON [PRIMARY] 19 | 20 | 21 | ALTER TABLE [Data].[Sales] WITH CHECK ADD CONSTRAINT [FK_Sales_Customers] FOREIGN KEY([CustomerKey]) REFERENCES [Data].[Customer] ([CustomerKey]) 22 | ALTER TABLE [Data].[Sales] WITH CHECK ADD CONSTRAINT [FK_Sales_Products] FOREIGN KEY([ProductKey]) REFERENCES [Data].[Product] ([ProductKey]) 23 | ALTER TABLE [Data].[Sales] WITH CHECK ADD CONSTRAINT [FK_Sales_Stores] FOREIGN KEY([StoreKey]) REFERENCES [Data].[Store] ([StoreKey]) 24 | 25 | ALTER TABLE [Data].[Sales] CHECK CONSTRAINT [FK_Sales_Customers] 26 | ALTER TABLE [Data].[Sales] CHECK CONSTRAINT [FK_Sales_Products] 27 | ALTER TABLE [Data].[Sales] CHECK CONSTRAINT [FK_Sales_Stores] 28 | 29 | CREATE NONCLUSTERED INDEX [IX_Sales_CustomerKey] ON [Data].[Sales] ( [CustomerKey] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 30 | CREATE NONCLUSTERED INDEX [IX_Sales_ProductKey] ON [Data].[Sales] ( [ProductKey] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 31 | CREATE NONCLUSTERED INDEX [IX_Sales_StoreKey] ON [Data].[Sales] ( [StoreKey] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 32 | 33 | GO 34 | 35 | 36 | ------------------------------------------------------------------------------------------ 37 | -- 38 | -- Views 39 | -- 40 | ------------------------------------------------------------------------------------------ 41 | 42 | CREATE OR ALTER VIEW dbo.Sales AS 43 | SELECT 44 | OrderKey AS [Order Number], 45 | [LineNumber] AS [Line Number], 46 | [OrderDate] AS [Order Date], 47 | [DeliveryDate] AS [Delivery Date], 48 | CustomerKey, 49 | StoreKey, 50 | ProductKey, 51 | Quantity, 52 | UnitPrice AS [Unit Price], 53 | NetPrice AS [Net Price], 54 | UnitCost AS [Unit Cost], 55 | CurrencyCode AS [Currency Code], 56 | ExchangeRate AS [Exchange Rate] 57 | FROM 58 | [Data].Sales 59 | 60 | GO 61 | 62 | -------------------------------------------------------------------------------- /scripts/sql/CreateTablesOrders.sql: -------------------------------------------------------------------------------- 1 | PRINT 'SQL > Creating [Data].[Orders] and [Data].[OrderRows]' 2 | 3 | CREATE TABLE [Data].[Orders]( 4 | [OrderKey] [bigint] NOT NULL, 5 | [CustomerKey] [int] NOT NULL, 6 | [StoreKey] [int] NOT NULL, 7 | [OrderDate] [date] NOT NULL, 8 | [DeliveryDate] [date] NOT NULL, 9 | [CurrencyCode] [nvarchar](5) NOT NULL 10 | CONSTRAINT [PK_Orders] PRIMARY KEY CLUSTERED ( [OrderKey] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 11 | ) ON [PRIMARY] 12 | 13 | 14 | CREATE TABLE [Data].[OrderRows]( 15 | [OrderKey] [bigint] NOT NULL, 16 | [LineNumber] [int] NOT NULL, 17 | [ProductKey] [int] NOT NULL, 18 | [Quantity] [int] NOT NULL, 19 | [UnitPrice] [money] NOT NULL, 20 | [NetPrice] [money] NOT NULL, 21 | [UnitCost] [money] NOT NULL 22 | CONSTRAINT [PK_OrderRows] PRIMARY KEY CLUSTERED ( [OrderKey] ASC, [LineNumber] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 23 | ) ON [PRIMARY] 24 | 25 | 26 | ALTER TABLE [Data].[OrderRows] WITH CHECK ADD CONSTRAINT [FK_OrderRows_Orders] FOREIGN KEY([OrderKey]) REFERENCES [Data].[Orders] ([OrderKey]) 27 | ALTER TABLE [Data].[OrderRows] WITH CHECK ADD CONSTRAINT [FK_OrderRows_Products] FOREIGN KEY([ProductKey]) REFERENCES [Data].[Product] ([ProductKey]) 28 | ALTER TABLE [Data].[OrderRows] CHECK CONSTRAINT [FK_OrderRows_Orders] 29 | ALTER TABLE [Data].[OrderRows] CHECK CONSTRAINT [FK_OrderRows_Products] 30 | 31 | ALTER TABLE [Data].[Orders] WITH CHECK ADD CONSTRAINT [FK_Orders_Customers] FOREIGN KEY([CustomerKey]) REFERENCES [Data].[Customer] ([CustomerKey]) 32 | ALTER TABLE [Data].[Orders] WITH CHECK ADD CONSTRAINT [FK_Orders_Stores] FOREIGN KEY([StoreKey]) REFERENCES [Data].[Store] ([StoreKey]) 33 | ALTER TABLE [Data].[Orders] CHECK CONSTRAINT [FK_Orders_Customers] 34 | ALTER TABLE [Data].[Orders] CHECK CONSTRAINT [FK_Orders_Stores] 35 | 36 | CREATE NONCLUSTERED INDEX [IX_OrderRows_OrderKey] ON [Data].[OrderRows] ( [OrderKey] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 37 | CREATE NONCLUSTERED INDEX [IX_OrderRows_ProductKey] ON [Data].[OrderRows] ( [ProductKey] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 38 | CREATE NONCLUSTERED INDEX [IX_Orders_CustomerKey] ON [Data].[Orders] ( [CustomerKey] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 39 | CREATE NONCLUSTERED INDEX [IX_Orders_StoreKey] ON [Data].[Orders] ( [StoreKey] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 40 | -------------------------------------------------------------------------------- /DatabaseGenerator/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Globalization; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Reflection; 6 | using System.Text.Json; 7 | using System.Threading.Tasks; 8 | 9 | 10 | namespace DatabaseGenerator 11 | { 12 | 13 | public class Program 14 | { 15 | 16 | static async Task Main(string[] args) 17 | { 18 | var assemblyName = Assembly.GetExecutingAssembly().GetName(); 19 | Console.WriteLine($"{assemblyName.Name} - {assemblyName.Version}"); 20 | Console.WriteLine(); 21 | Console.WriteLine($"https://github.com/sql-bi/Contoso-Data-Generator-V2"); 22 | Console.WriteLine(); 23 | 24 | 25 | if (args.Length == 0) 26 | { 27 | Console.WriteLine("USAGE: databasegenerator.exe configfile datafile outputfolder cachefolder [param:OrdersCount=nnnnnnn] "); 28 | return; 29 | } 30 | 31 | string configFile = args[0]; 32 | string datafile = args[1]; 33 | string outputFolder = args[2]; 34 | string cacheFolder = args[3]; 35 | 36 | // read config 37 | var config = JsonSerializer.Deserialize(File.ReadAllText(Path.Combine(configFile)), ConfigSerializerContext.Default.Config); 38 | InjectConfigValuesFromCommandLine(config, args); 39 | config.SalesOrdersOut = new SOOutput(config.SalesOrders); 40 | 41 | // run engine 42 | await new Engine(datafile, outputFolder, cacheFolder, config).Exec(); 43 | } 44 | 45 | 46 | private static void InjectConfigValuesFromCommandLine(Config cfg, string[] args) 47 | { 48 | Func getParamValue = (args, paramName) => { return args.Where(x => x.StartsWith($"param:{paramName}=")).Select(x => x.Split('=')[1]).FirstOrDefault(); }; 49 | Func convToInt = (s, defValue) => { return string.IsNullOrEmpty(s) ? defValue : int.Parse(s); }; 50 | Func convToDouble = (s, defValue) => { return string.IsNullOrEmpty(s) ? defValue : Double.Parse(s, CultureInfo.InvariantCulture); }; 51 | Func convToDT = (s, defValue) => { return string.IsNullOrEmpty(s) ? defValue : DateTime.ParseExact(s, "yyyy-MM-dd", CultureInfo.InvariantCulture); }; 52 | Func convToString = (s, defValue) => { return string.IsNullOrEmpty(s) ? defValue : s; }; 53 | 54 | cfg.OrdersCount = convToInt(getParamValue(args, "OrdersCount"), cfg.OrdersCount); 55 | cfg.CutDateBefore = convToDT(getParamValue(args, "CutDateBefore"), cfg.CutDateBefore); 56 | cfg.CutDateAfter = convToDT(getParamValue(args, "CutDateAfter"), cfg.CutDateAfter); 57 | cfg.CustomerPercentage = convToDouble(getParamValue(args, "CustomerPercentage"), cfg.CustomerPercentage); 58 | cfg.OutputFormat = convToString(getParamValue(args, "OutputFormat"), cfg.OutputFormat); 59 | cfg.DeltaTableOrdersPerFile = convToInt(getParamValue(args, "DeltaTableOrdersPerFile"), cfg.DeltaTableOrdersPerFile); 60 | cfg.ParquetOrdersRowGroupSize = convToInt(getParamValue(args, "ParquetOrdersRowGroupSize"), cfg.ParquetOrdersRowGroupSize); 61 | cfg.StartDT = convToDT(getParamValue(args, "StartDT"), cfg.StartDT).Value; 62 | cfg.YearsCount = convToInt(getParamValue(args, "YearsCount"), cfg.YearsCount); 63 | } 64 | 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Parquet/ParquetProduct.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using Parquet.Serialization.Attributes; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Text.Json; 8 | using System.Threading.Tasks; 9 | 10 | 11 | namespace DatabaseGenerator.DataWriter.Parquet 12 | { 13 | 14 | public class ParquetProduct 15 | { 16 | 17 | // --------------------------------------------------------- 18 | public int ProductKey { get; set; } 19 | public string ProductCode { get; set; } 20 | public string ProductName { get; set; } 21 | public string Manufacturer { get; set; } 22 | public string Brand { get; set; } 23 | public string Color { get; set; } 24 | public string WeightUnit { get; set; } 25 | [ParquetDecimal(20, 5)] 26 | public decimal? Weight { get; set; } 27 | [ParquetDecimal(20, 5)] 28 | public decimal Cost { get; set; } 29 | [ParquetDecimal(20, 5)] 30 | public decimal Price { get; set; } 31 | public int CategoryKey { get; set; } 32 | public string CategoryName { get; set; } 33 | public int SubCategoryKey { get; set; } 34 | public string SubCategoryName { get; set; } 35 | 36 | // --------------------------------------------------------- 37 | 38 | 39 | public static ParquetProduct FromProduct(Product p) 40 | { 41 | return new ParquetProduct() 42 | { 43 | ProductKey = p.ProductID, 44 | ProductCode = p.ProductCode, 45 | ProductName = p.ProductName, 46 | Manufacturer = p.Manufacturer, 47 | Brand = p.Brand, 48 | Color = p.Color, 49 | WeightUnit = p.WeightUnit, 50 | Weight = p.Weight, 51 | Cost = (decimal)p.Cost, 52 | Price = (decimal)p.Price, 53 | CategoryKey = p.CategoryID, 54 | CategoryName = p.CategoryName, 55 | SubCategoryKey = p.SubCategoryID, 56 | SubCategoryName = p.SubCategoryName, 57 | }; 58 | } 59 | 60 | 61 | public static string GETDeltaSchema() 62 | { 63 | var schema = new DeltaSchema() 64 | { 65 | type = "struct", 66 | fields = new List() 67 | { 68 | DeltaField.GetInstance("ProductKey", "integer", false), 69 | DeltaField.GetInstance("ProductCode", "string", true), 70 | DeltaField.GetInstance("ProductName", "string", true), 71 | DeltaField.GetInstance("Manufacturer", "string", true), 72 | DeltaField.GetInstance("Brand", "string", true), 73 | DeltaField.GetInstance("Color", "string", true), 74 | DeltaField.GetInstance("WeightUnit", "string", true), 75 | DeltaField.GetInstance("Weight", "decimal(20,5)", true), 76 | DeltaField.GetInstance("Cost", "decimal(20,5)", false), 77 | DeltaField.GetInstance("Price", "decimal(20,5)", false), 78 | DeltaField.GetInstance("CategoryKey", "integer", false), 79 | DeltaField.GetInstance("CategoryName", "string", true), 80 | DeltaField.GetInstance("SubCategoryKey", "integer", false), 81 | DeltaField.GetInstance("SubCategoryName", "string", true), 82 | } 83 | }; 84 | 85 | return JsonSerializer.Serialize(schema, DeltaSchemaSerializerContext.Default.DeltaSchema); 86 | } 87 | 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Parquet/ParquetSale.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using Parquet.Serialization.Attributes; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Text.Json; 8 | using System.Threading.Tasks; 9 | 10 | namespace DatabaseGenerator.DataWriter.Parquet 11 | { 12 | public class ParquetSale 13 | { 14 | 15 | // ------------------------------------------------------------------------- 16 | public long OrderKey { get; set; } 17 | public int LineNumber { get; set; } 18 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 19 | public DateTime OrderDate { get; set; } 20 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 21 | public DateTime DeliveryDate { get; set; } 22 | public int CustomerKey { get; set; } 23 | public int StoreKey { get; set; } 24 | public int ProductKey { get; set; } 25 | public int Quantity { get; set; } 26 | [ParquetDecimal(20, 5)] 27 | public decimal UnitPrice { get; set; } 28 | [ParquetDecimal(20, 5)] 29 | public decimal NetPrice { get; set; } 30 | [ParquetDecimal(20, 5)] 31 | public decimal UnitCost { get; set; } 32 | public string CurrencyCode { get; set; } 33 | [ParquetDecimal(20, 5)] 34 | public decimal ExchangeRate { get; set; } 35 | // ------------------------------------------------------------------------- 36 | 37 | 38 | public static ParquetSale FromSale(Sale sale) 39 | { 40 | return new ParquetSale() 41 | { 42 | OrderKey = sale.OrderKey, 43 | LineNumber = sale.LineNumber, 44 | OrderDate = sale.OrderDate, 45 | DeliveryDate = sale.DeliveryDate, 46 | CustomerKey = sale.CustomerKey, 47 | StoreKey = sale.StoreKey, 48 | ProductKey = sale.ProductKey, 49 | Quantity = sale.Quantity, 50 | UnitPrice = sale.UnitPrice, 51 | NetPrice = sale.NetPrice, 52 | UnitCost = sale.UnitCost, 53 | CurrencyCode = sale.CurrencyCode, 54 | ExchangeRate = sale.ExchangeRate 55 | }; 56 | } 57 | 58 | 59 | public static string GETDeltaSchema() 60 | { 61 | var schema = new DeltaSchema() 62 | { 63 | type = "struct", 64 | fields = new List() 65 | { 66 | DeltaField.GetInstance("OrderKey", "long", false), 67 | DeltaField.GetInstance("LineNumber", "integer", false), 68 | DeltaField.GetInstance("OrderDate", "timestamp", false), 69 | DeltaField.GetInstance("DeliveryDate", "timestamp", false), 70 | DeltaField.GetInstance("CustomerKey", "integer", false), 71 | DeltaField.GetInstance("StoreKey", "integer", false), 72 | DeltaField.GetInstance("ProductKey", "integer", false), 73 | DeltaField.GetInstance("Quantity", "integer", false), 74 | DeltaField.GetInstance("UnitPrice", "decimal(20,5)", false), 75 | DeltaField.GetInstance("NetPrice", "decimal(20,5)", false), 76 | DeltaField.GetInstance("UnitCost", "decimal(20,5)", false), 77 | DeltaField.GetInstance("CurrencyCode", "string", true), 78 | DeltaField.GetInstance("ExchangeRate", "decimal(20,5)", false), 79 | } 80 | }; 81 | return JsonSerializer.Serialize(schema, DeltaSchemaSerializerContext.Default.DeltaSchema); 82 | } 83 | 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /DatabaseGenerator/Config.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using System.Text.Json.Serialization; 5 | 6 | 7 | namespace DatabaseGenerator 8 | { 9 | 10 | // ----------------- 11 | // Required for serialization because JsonSerializerIsReflectionEnabledByDefault = false 12 | [JsonSerializable(typeof(Config))] 13 | internal partial class ConfigSerializerContext : JsonSerializerContext 14 | { 15 | } 16 | // ----------------- 17 | 18 | 19 | public class Config 20 | { 21 | 22 | // Class used for reading configuration file by json deserialization. Modify carefully. 23 | 24 | public int OrdersCount { get; set; } 25 | public DateTime StartDT { get; set; } 26 | public int YearsCount { get; set; } 27 | public DateTime? CutDateBefore { get; set; } 28 | public DateTime? CutDateAfter { get; set; } 29 | public double CustomerPercentage { get; set; } 30 | public int CustomerFakeGenerator { get; set; } // >0 : use fake customers 31 | 32 | public string OutputFormat { get; set; } // CSV PARQUET DELTATABLE 33 | public string SalesOrders { get; set; } // SALES ORDERS BOTH 34 | 35 | public int DeltaTableOrdersPerFile { get; set; } 36 | public int ParquetOrdersRowGroupSize { get; set; } 37 | 38 | public int? CsvMaxOrdersPerFile { get; set; } 39 | public int? CsvGzCompression { get; set; } 40 | 41 | 42 | public DaysWeight DaysWeight { get; set; } 43 | 44 | public double[] OrderRowsWeights { get; set; } 45 | public double[] OrderQuantityWeights { get; set; } 46 | public double[] DiscountWeights { get; set; } 47 | public double[] OnlinePerCent { get; set; } 48 | public double[] DeliveryDateLambdaWeights { get; set; } 49 | public Dictionary CountryCurrency { get; set; } 50 | 51 | 52 | public List AnnualSpikes { get; set; } 53 | 54 | public List OneTimeSpikes { get; set; } 55 | 56 | public CustomerActivity CustomerActivity { get; set; } 57 | 58 | 59 | // --- Filled post deserialization --- 60 | public SOOutput SalesOrdersOut { get; set; } 61 | 62 | } 63 | 64 | 65 | public class DaysWeight 66 | { 67 | public bool DaysWeightConstant { get; set; } 68 | public double[] DaysWeightPoints { get; set; } 69 | public double[] DaysWeightValues { get; set; } 70 | public bool DaysWeightAddSpikes { get; set; } 71 | public double[] WeekDaysFactor { get; set; } 72 | public double DayRandomness { get; set; } 73 | } 74 | 75 | 76 | public class OneTimeSpike 77 | { 78 | public DateTime DT1 { get; set; } 79 | public DateTime DT2 { get; set; } 80 | public double Factor { get; set; } 81 | } 82 | 83 | 84 | public class AnnualSpike 85 | { 86 | public int StartDay { get; set; } 87 | public int EndDay { get; set; } 88 | public double Factor { get; set; } 89 | } 90 | 91 | 92 | public class CustomerActivity 93 | { 94 | public double[] StartDateWeightPoints { get; set; } 95 | public double[] StartDateWeightValues { get; set; } 96 | public double[] EndDateWeightPoints { get; set; } 97 | public double[] EndDateWeightValues { get; set; } 98 | } 99 | 100 | 101 | public class SOOutput 102 | { 103 | private bool _orders = false; 104 | private bool _sales = false; 105 | 106 | public SOOutput(string outputType) 107 | { 108 | outputType = outputType.ToUpper(); 109 | _orders = outputType == "ORDERS" || outputType == "BOTH"; 110 | _sales = outputType == "SALES" || outputType == "BOTH"; 111 | if (!_orders && !_sales) throw new Exception($"Unknown option for [SalesOrders] - '{outputType}'"); 112 | } 113 | 114 | public bool WriteOrders { get { return _orders; } } 115 | public bool WriteSales { get { return _sales; } } 116 | } 117 | 118 | } 119 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Parquet/ParquetDateExtended.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using Parquet.Serialization.Attributes; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Text.Json; 8 | using System.Threading.Tasks; 9 | 10 | namespace DatabaseGenerator.DataWriter.Parquet 11 | { 12 | public class ParquetDateExtended 13 | { 14 | 15 | // ---------------------------------------------------------------- 16 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 17 | public DateTime Date { get; set; } 18 | public string DateKey { get; set; } 19 | public int Year { get; set; } 20 | public string YearQuarter { get; set; } 21 | public int YearQuarterNumber { get; set; } 22 | public string Quarter { get; set; } 23 | public string YearMonth { get; set; } 24 | public string YearMonthShort { get; set; } 25 | public int YearMonthNumber { get; set; } 26 | public string Month { get; set; } 27 | public string MonthShort { get; set; } 28 | public int MonthNumber { get; set; } 29 | public string DayofWeek { get; set; } 30 | public string DayofWeekShort { get; set; } 31 | public int DayofWeekNumber { get; set; } 32 | public int WorkingDay { get; set; } 33 | public int WorkingDayNumber { get; set; } 34 | // ---------------------------------------------------------------- 35 | 36 | 37 | 38 | public static ParquetDateExtended FromDateExtended(DateExtended de) 39 | { 40 | return new ParquetDateExtended() 41 | { 42 | Date = de.Date, 43 | DateKey = de.DateKey, 44 | Year = de.Year, 45 | YearQuarter = de.YearQuarter, 46 | YearQuarterNumber = de.YearQuarterNumber, 47 | Quarter = de.Quarter, 48 | YearMonth = de.YearMonth, 49 | YearMonthShort = de.YearMonthShort, 50 | YearMonthNumber = de.YearMonthNumber, 51 | Month = de.Month, 52 | MonthShort = de.MonthShort, 53 | MonthNumber = de.MonthNumber, 54 | DayofWeek = de.DayofWeek, 55 | DayofWeekShort = de.DayofWeekShort, 56 | DayofWeekNumber = de.DayofWeekNumber, 57 | WorkingDay = de.WorkingDay, 58 | WorkingDayNumber = de.WorkingDayNumber, 59 | }; 60 | } 61 | 62 | 63 | 64 | 65 | public static string GETDeltaSchema() 66 | { 67 | var schema = new DeltaSchema() 68 | { 69 | type = "struct", 70 | fields = new List() 71 | { 72 | DeltaField.GetInstance("Date", "timestamp", false), 73 | DeltaField.GetInstance("DateKey", "string", true), 74 | DeltaField.GetInstance("Year", "integer", false), 75 | DeltaField.GetInstance("YearQuarter", "string", true), 76 | DeltaField.GetInstance("YearQuarterNumber", "integer", false), 77 | DeltaField.GetInstance("Quarter", "string", true), 78 | DeltaField.GetInstance("YearMonth", "string", true), 79 | DeltaField.GetInstance("YearMonthShort", "string", true), 80 | DeltaField.GetInstance("YearMonthNumber", "integer", false), 81 | DeltaField.GetInstance("Month", "string", true), 82 | DeltaField.GetInstance("MonthShort", "string", true), 83 | DeltaField.GetInstance("MonthNumber", "integer", false), 84 | DeltaField.GetInstance("DayofWeek", "string", false), 85 | DeltaField.GetInstance("DayofWeekShort", "string", true), 86 | DeltaField.GetInstance("DayofWeekNumber", "integer", false), 87 | DeltaField.GetInstance("WorkingDay", "integer", false), 88 | DeltaField.GetInstance("WorkingDayNumber", "integer", false), 89 | } 90 | }; 91 | 92 | return JsonSerializer.Serialize(schema, DeltaSchemaSerializerContext.Default.DeltaSchema); 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /DatabaseGenerator/ECBExchangesReader.cs: -------------------------------------------------------------------------------- 1 | using CsvHelper; 2 | using DatabaseGenerator.Models; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Globalization; 6 | using System.IO; 7 | using System.Linq; 8 | using System.Text; 9 | using System.Threading.Tasks; 10 | 11 | 12 | namespace DatabaseGenerator 13 | { 14 | 15 | internal class ECBExchangesReader 16 | { 17 | 18 | public static List GetData(string fileNameFP, bool addFutureFakeData) 19 | { 20 | // https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/index.en.html 21 | // https://www.ecb.europa.eu/stats/eurofxref/eurofxref-hist.zip 22 | 23 | // Example: 24 | // Date,USD,JPY,BGN,CYP,CZK,DKK,EEK,GBP,HUF,LTL,LVL,MTL,PLN,ROL,RON,SEK,SIT,SKK,CHF,ISK,NOK,HRK,RUB,TRL,TRY,AUD,BRL,CAD,CNY,HKD,IDR,ILS,INR,KRW,MXN,MYR,NZD,PHP,SGD,THB,ZAR, 25 | // 2024-04-26,1.0714,168.03,1.9558,N/A,25.164,7.4573,N/A,0.85643,392.28,N/A,N/A,N/A,4.3205,N/A,4.9764,11.7052,N/A,N/A,0.9779,150.3,11.7995,N/A,N/A,N/A,34.8036,1.6392,5.5208,1.4632,7.7638,8.3873,17385.77,4.0798,89.3191,1474.65,18.4672,5.1079,1.801,61.83,1.4587,39.583,20.2037, 26 | // 2024-04-25,1.072,166.76,1.9558,N/A,25.152,7.4587,N/A,0.85675,392.98,N/A,N/A,N/A,4.3165,N/A,4.9761,11.639,N/A,N/A,0.9792,150.1,11.714,N/A,N/A,N/A,34.8475,1.6415,5.505,1.4659,7.7682,8.392,17355.14,4.0646,89.3155,1473.23,18.267,5.1215,1.799,61.94,1.4581,39.691,20.3377, 27 | 28 | // Important: the source file: 29 | // - contains exchange rate from Euro to other currencies 30 | // - is ordered by descending date 31 | // - only contains data for effective days of currencies exchange. So, there are holes to be filled. 32 | 33 | var outList = new List(); 34 | 35 | string[] currencyCodes = { "AUD", "CAD", "EUR", "GBP", "USD" }; 36 | 37 | var csv = new CsvReader(new StreamReader(fileNameFP), CultureInfo.InvariantCulture); 38 | csv.Read(); 39 | csv.ReadHeader(); 40 | 41 | 42 | DateTime previousDT = DateTime.MinValue; 43 | 44 | while (csv.Read()) 45 | { 46 | var dt = DateTime.ParseExact(csv.GetField("Date"), "yyyy-MM-dd", CultureInfo.InvariantCulture); 47 | 48 | if (previousDT == DateTime.MinValue) { previousDT = dt.AddDays(1); } 49 | 50 | for (int i = 0; i < previousDT.Subtract(dt).TotalDays; i++) 51 | { 52 | var exchangeDT = previousDT.AddDays(-i - 1); 53 | //Console.WriteLine($"{dt} {exchangeDT}"); 54 | 55 | foreach (var currencyCodeFrom in currencyCodes) 56 | { 57 | foreach (var currencyCodeTo in currencyCodes) 58 | { 59 | decimal fromExchange = (currencyCodeFrom == "EUR") ? 1.0M : (decimal)csv.GetField(currencyCodeFrom); 60 | decimal toExchange = (currencyCodeTo == "EUR") ? 1.0M : (decimal)csv.GetField(currencyCodeTo); 61 | decimal exchangeRate = decimal.Round(1 / fromExchange * toExchange, 5); 62 | outList.Add(new CurrencyExchange() { Date = exchangeDT, FromCurrency = currencyCodeFrom, ToCurrency = currencyCodeTo, Exchange = exchangeRate }); 63 | //Console.WriteLine($"{exchangeDT} {currencyCodeFrom} {currencyCodeTo} {exchangeRate.ToString("0.00000")}"); 64 | } 65 | } 66 | } 67 | 68 | previousDT = dt; 69 | } 70 | 71 | if (addFutureFakeData) 72 | { 73 | AddFutureData(outList); 74 | } 75 | 76 | return outList; 77 | } 78 | 79 | 80 | private static void AddFutureData(List dataSet) 81 | { 82 | var refDT = dataSet.Max(x => x.Date); 83 | 84 | var fakeData = new List(); 85 | 86 | foreach (var item in dataSet) 87 | { 88 | double deltaDays = refDT.Subtract(item.Date).TotalDays; 89 | var fakeItem = new CurrencyExchange() { Date = refDT.AddDays(deltaDays + 1), FromCurrency = item.FromCurrency, ToCurrency = item.ToCurrency, Exchange = item.Exchange }; 90 | fakeData.Add(fakeItem); 91 | } 92 | 93 | var temp = new List(); 94 | temp.AddRange(dataSet); 95 | temp.AddRange(fakeData); 96 | 97 | dataSet.Clear(); 98 | dataSet.AddRange(temp.OrderBy(x => x.Date).ThenBy(x => x.FromCurrency).ThenBy(x => x.ToCurrency)); 99 | } 100 | 101 | } 102 | 103 | } 104 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Parquet/ParquetCustomer.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using Parquet.Serialization.Attributes; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Text.Json; 8 | using System.Threading.Tasks; 9 | 10 | 11 | namespace DatabaseGenerator.DataWriter.Parquet 12 | { 13 | 14 | public class ParquetCustomer 15 | { 16 | 17 | // ---------------------------------------------------------------- 18 | public int CustomerKey { get; set; } 19 | public int GeoAreaKey { get; set; } 20 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 21 | public DateTime? StartDT { get; set; } 22 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 23 | public DateTime? EndDT { get; set; } 24 | public string Continent { get; set; } 25 | public string Gender { get; set; } 26 | public string Title { get; set; } 27 | public string GivenName { get; set; } 28 | public string MiddleInitial { get; set; } 29 | public string Surname { get; set; } 30 | public string StreetAddress { get; set; } 31 | public string City { get; set; } 32 | public string State { get; set; } 33 | public string StateFull { get; set; } 34 | public string ZipCode { get; set; } 35 | public string Country { get; set; } 36 | public string CountryFull { get; set; } 37 | [ParquetTimestamp(ParquetTimestampResolution.Milliseconds)] 38 | public DateTime Birthday { get; set; } 39 | public int Age { get; set; } 40 | public string Occupation { get; set; } 41 | public string Company { get; set; } 42 | public string Vehicle { get; set; } 43 | [ParquetDecimal(20, 5)] 44 | public decimal Latitude { get; set; } 45 | [ParquetDecimal(20, 5)] 46 | public decimal Longitude { get; set; } 47 | // ---------------------------------------------------------------- 48 | 49 | 50 | public static ParquetCustomer FromCustomer(DatabaseGenerator.Models.Customer c) 51 | { 52 | #pragma warning disable format 53 | return new ParquetCustomer() 54 | { 55 | CustomerKey = c.CustomerID, 56 | GeoAreaKey = c.GeoAreaID, 57 | StartDT = c.StartDT, 58 | EndDT = c.EndDT, 59 | Continent = c.Continent, 60 | Gender = c.Gender, 61 | Title = c.Title, 62 | GivenName = c.GivenName, 63 | MiddleInitial = c.MiddleInitial, 64 | Surname = c.Surname, 65 | StreetAddress = c.StreetAddress, 66 | City = c.City, 67 | State = c.State, 68 | StateFull = c.StateFull, 69 | ZipCode = c.ZipCode, 70 | Country = c.Country, 71 | CountryFull = c.CountryFull, 72 | Birthday = c.Birthday, 73 | Age = c.Age, 74 | Occupation = c.Occupation, 75 | Company = c.Company, 76 | Vehicle = c.Vehicle, 77 | Latitude = (decimal)c.Latitude, 78 | Longitude = (decimal)c.Longitude, 79 | }; 80 | #pragma warning restore format 81 | } 82 | 83 | public static string GETDeltaSchema() 84 | { 85 | var schema = new DeltaSchema() 86 | { 87 | type = "struct", 88 | fields = new List() 89 | { 90 | DeltaField.GetInstance("CustomerKey", "integer", false), 91 | DeltaField.GetInstance("GeoAreaKey", "integer", false), 92 | DeltaField.GetInstance("StartDT", "timestamp", true), 93 | DeltaField.GetInstance("EndDT", "timestamp", true), 94 | DeltaField.GetInstance("Continent", "string", true), 95 | DeltaField.GetInstance("Gender", "string", true), 96 | DeltaField.GetInstance("Title", "string", true), 97 | DeltaField.GetInstance("GivenName", "string", true), 98 | DeltaField.GetInstance("MiddleInitial", "string", true), 99 | DeltaField.GetInstance("Surname", "string", true), 100 | DeltaField.GetInstance("StreetAddress", "string", true), 101 | DeltaField.GetInstance("City", "string", true), 102 | DeltaField.GetInstance("State", "string", true), 103 | DeltaField.GetInstance("StateFull", "string", true), 104 | DeltaField.GetInstance("ZipCode", "string", true), 105 | DeltaField.GetInstance("Country", "string", true), 106 | DeltaField.GetInstance("CountryFull", "string", true), 107 | DeltaField.GetInstance("Birthday", "timestamp", false), 108 | DeltaField.GetInstance("Age", "integer", false), 109 | DeltaField.GetInstance("Occupation", "string", true), 110 | DeltaField.GetInstance("Company", "string", true), 111 | DeltaField.GetInstance("Vehicle", "string", true), 112 | DeltaField.GetInstance("Latitude", "decimal(20,5)", false), 113 | DeltaField.GetInstance("Longitude", "decimal(20,5)", false), 114 | } 115 | }; 116 | return JsonSerializer.Serialize(schema, DeltaSchemaSerializerContext.Default.DeltaSchema); 117 | } 118 | 119 | } 120 | 121 | } 122 | -------------------------------------------------------------------------------- /DatabaseGenerator/CustomerRaw2Csv.cs: -------------------------------------------------------------------------------- 1 | using CsvHelper; 2 | using CsvHelper.Configuration; 3 | using DatabaseGenerator.Models; 4 | using System; 5 | using System.Collections.Generic; 6 | using System.Globalization; 7 | using System.IO; 8 | using System.IO.Compression; 9 | using System.Linq; 10 | using System.Text; 11 | using System.Threading.Tasks; 12 | 13 | 14 | namespace DatabaseGenerator 15 | { 16 | 17 | public class CustomerRAW2Csv 18 | { 19 | 20 | public class UKPostCode 21 | { 22 | public string Postcode { get; set; } 23 | public string Region { get; set; } 24 | } 25 | 26 | 27 | public static List LoadCustomersFromGZCSV(string folderName, List geoAreas) 28 | { 29 | var countryToContinent = new Dictionary() 30 | { 31 | { "DE" , "Europe" }, 32 | { "GB" , "Europe" }, 33 | { "IT" , "Europe" }, 34 | { "NL" , "Europe" }, 35 | { "AU" , "Australia" }, 36 | { "CA" , "North America" }, 37 | { "FR" , "Europe" }, 38 | { "US" , "North America" } 39 | }; 40 | 41 | var ukPostCodesLookUp = new CsvReader(new StreamReader(Path.Combine(folderName, "UKPostcodes.csv")), CultureInfo.InvariantCulture) 42 | .GetRecords().ToDictionary(x => x.Postcode); 43 | 44 | var geoAreasLookUpDict = geoAreas.Where(x => x.CountryCode != null && x.StateCode != null) 45 | .ToDictionary(x => x.CountryCode.Trim() + x.StateCode.Trim()); 46 | 47 | var allCustomers = new List(); 48 | 49 | int currentCustomerID = 0; 50 | 51 | Logger.Info($"csvgz FolderName: {folderName}"); 52 | 53 | var csvGzFileList = Directory.GetFiles(folderName, "Cust.*.csv.gz").OrderBy(x => x).ToList(); 54 | 55 | foreach (var csvGzFileName in csvGzFileList) 56 | { 57 | Logger.Info($"Processing: {Path.GetFileName(csvGzFileName)}"); 58 | 59 | string csvBody = GzUnCompressToString(csvGzFileName); 60 | 61 | using (var csv = new CsvReader(new StringReader(csvBody), CultureInfo.InvariantCulture)) 62 | { 63 | csv.Context.RegisterClassMap(); 64 | 65 | var customers = csv.GetRecords().ToList(); 66 | 67 | // Remove the not managed "US - Northern Mariana Islands" 68 | customers.RemoveAll(c => c.Country == "US" && c.State == "MP"); 69 | 70 | foreach (var cust in customers) 71 | { 72 | cust.CustomerID = currentCustomerID++; 73 | 74 | // Correct City name if all uppercase 75 | cust.City = NormalizeLettersCases(cust.City); 76 | 77 | // update UK [State] from zip codes 78 | if (cust.CountryFull == "United Kingdom") 79 | { 80 | string zipCodeFirstPart = cust.ZipCode.Split(' ')[0]; 81 | if (zipCodeFirstPart == "JE1") zipCodeFirstPart = "JE2"; 82 | if (zipCodeFirstPart == "CR1") zipCodeFirstPart = "CR0"; 83 | cust.State = cust.StateFull = ukPostCodesLookUp[zipCodeFirstPart].Region; 84 | } 85 | 86 | // fill [Continent] 87 | cust.Continent = countryToContinent[cust.Country]; 88 | 89 | // fill [GeoAreaID] 90 | var geoArea = geoAreasLookUpDict.GetValueOrDefault(cust.Country.Trim() + cust.State.Trim()); 91 | if (geoArea == null) 92 | throw new Exception($"GeoArea lookup failed for [{cust.Country}-{cust.State}]"); 93 | cust.GeoAreaID = geoArea.GeoAreaID; 94 | } 95 | 96 | allCustomers.AddRange(customers); 97 | } 98 | } 99 | 100 | Logger.Info($"CustomerAll.csv - total items: {allCustomers.Count}"); 101 | 102 | return allCustomers; 103 | } 104 | 105 | 106 | private static string GzUnCompressToString(string fileName) 107 | { 108 | using (var inStream = File.OpenRead(fileName)) 109 | using (var outStream = new MemoryStream()) 110 | { 111 | using (var gzip = new GZipStream(inStream, CompressionMode.Decompress, false)) 112 | { 113 | gzip.CopyTo(outStream); 114 | } 115 | var bytes = outStream.ToArray(); 116 | 117 | return Encoding.GetEncoding(1252).GetString(outStream.ToArray()); 118 | } 119 | } 120 | 121 | 122 | public class CustomerCSVGZMapping : ClassMap 123 | { 124 | public CustomerCSVGZMapping() 125 | { 126 | Map(m => m.CustomerID).Name("Number"); 127 | Map(m => m.Gender); 128 | Map(m => m.Title); 129 | Map(m => m.GivenName); 130 | Map(m => m.MiddleInitial); 131 | Map(m => m.Surname); 132 | Map(m => m.StreetAddress); 133 | Map(m => m.City); 134 | Map(m => m.State); 135 | Map(m => m.StateFull); 136 | Map(m => m.ZipCode); 137 | Map(m => m.Country); 138 | Map(m => m.CountryFull); 139 | Map(m => m.Birthday); 140 | Map(m => m.Age); 141 | Map(m => m.Occupation); 142 | Map(m => m.Company); 143 | Map(m => m.Vehicle); 144 | Map(m => m.Latitude); 145 | Map(m => m.Longitude); 146 | } 147 | } 148 | 149 | 150 | private static string NormalizeLettersCases(string s) 151 | { 152 | if (s.All(c => !Char.IsLetter(c) || (Char.IsLetter(c) && Char.IsUpper(c)))) 153 | { 154 | var newName = new Char[s.Length]; 155 | bool inWord = false; 156 | for (int i = 0; i < s.Length; i++) 157 | { 158 | newName[i] = inWord ? Char.ToLower(s[i]) : s[i]; 159 | inWord = Char.IsLetter(s[i]); 160 | } 161 | s = new string(newName); 162 | } 163 | 164 | return s; 165 | } 166 | 167 | } 168 | 169 | } 170 | -------------------------------------------------------------------------------- /DatabaseGenerator/ExcelReader.cs: -------------------------------------------------------------------------------- 1 | using ExcelDataReader; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Data; 5 | using System.IO; 6 | using System.Text; 7 | using System.Linq; 8 | using DatabaseGenerator.Models; 9 | using System.Diagnostics; 10 | 11 | 12 | namespace DatabaseGenerator 13 | { 14 | 15 | internal class ExcelReader 16 | { 17 | 18 | public static List Read(string fileName, string sheetName) where T : new() 19 | { 20 | DataSet ds = ExcelReader.ReadExcelAsDataSet(fileName); 21 | DataTable dt = ds.Tables[sheetName]; 22 | if (dt == null) 23 | throw new ApplicationException($"Cannot find sheet [{sheetName}] in excel file"); 24 | 25 | TrimStringFields(dt); 26 | 27 | var allColumns = dt.Columns.Cast().ToList(); 28 | var weightsColumns = allColumns.Where(c => c.ColumnName.StartsWith("W_")); 29 | 30 | var outList = new List(); 31 | 32 | foreach (DataRow row in dt.Rows) 33 | { 34 | T obj = new T(); 35 | outList.Add(obj); 36 | 37 | if (obj is WeightedItem) 38 | { 39 | (obj as WeightedItem).Weights = weightsColumns.Select(c => (double)row[c]).ToArray(); 40 | } 41 | 42 | if (obj is Category) 43 | { 44 | var cat = obj as Category; 45 | cat.CategoryID = (int)(double)row["CategoryID"]; 46 | cat.CategoryName = (string)row["CategoryName"]; 47 | cat.PricePerCent = allColumns.Where(c => c.ColumnName.StartsWith("PPC_")).Select(name => (double)row[name]).ToArray(); 48 | } 49 | 50 | if (obj is SubCategory) 51 | { 52 | var subCat = obj as SubCategory; 53 | subCat.SubCategoryID = (int)(double)row["SubCategoryID"]; 54 | subCat.CategoryID = (int)(double)row["CategoryID"]; 55 | subCat.SubCategoryName = (string)row["SubCategoryName"]; 56 | } 57 | 58 | if (obj is Product) 59 | { 60 | var prod = obj as Product; 61 | prod.ProductID = (int)(double)row["ProductID"]; 62 | prod.ProductName = (string)row["ProductName"]; 63 | prod.SubCategoryID = (int)(double)row["SubCategoryID"]; 64 | prod.Price = (double)row["Price"]; 65 | prod.Cost = (double)row["Cost"]; 66 | prod.ProductCode = (string)row["ProductCode"]; 67 | prod.Manufacturer = (string)row["Manufacturer"]; 68 | prod.Brand = (string)row["Brand"]; 69 | prod.Color = (string)row["Color"]; 70 | prod.WeightUnit = (string)row["WeightUnit"]; 71 | prod.Weight = (row["Weight"] != DBNull.Value) ? (decimal)(double)row["Weight"] : null; 72 | } 73 | 74 | if (obj is CustomerCluster) 75 | { 76 | var customerCluster = obj as CustomerCluster; 77 | customerCluster.ClusterID = (int)(double)row["ClusterID"]; 78 | customerCluster.OrdersWeight = (double)row["OrdersWeight"]; 79 | customerCluster.CustomersWeight = (double)row["CustomersWeight"]; 80 | } 81 | 82 | if (obj is GeoArea) 83 | { 84 | var geoArea = obj as GeoArea; 85 | geoArea.GeoAreaID = (int)(double)row["GeoAreaID"]; 86 | geoArea.CountryCode = (geoArea.GeoAreaID == -1) ? "--" : (string)row["CountryCode"]; 87 | geoArea.Country = (string)row["Country"]; 88 | geoArea.StateCode = (geoArea.GeoAreaID == -1) ? "--" : (string)row["StateCode"]; 89 | geoArea.StateLongName = (string)row["StateLongName"]; 90 | } 91 | 92 | if (obj is Store) 93 | { 94 | var store = obj as Store; 95 | store.StoreID = (int)(double)row["StoreID"]; 96 | store.GeoAreaID = (int)(double)row["GeoAreaID"]; 97 | store.OpenDate = row["OpenDate"] is DateTime ? (DateTime?)row["OpenDate"] : null; 98 | store.CloseDate = row["CloseDate"] is DateTime ? (DateTime?)row["CloseDate"] : null; 99 | store.StoreCode = (int)(double)row["StoreCode"]; 100 | store.Description = (string)row["Description"]; 101 | store.SquareMeters = row["SquareMeters"] != System.DBNull.Value ? (int)(double)row["SquareMeters"] : null; 102 | store.Status = row["Status"] != System.DBNull.Value ? (string)row["Status"] : null; 103 | } 104 | 105 | if (obj is SubCategoryLink) 106 | { 107 | var scLink = obj as SubCategoryLink; 108 | scLink.SubCategoryID = (int)(double)row["SubCategoryID"]; 109 | scLink.LinkedSubCategoryID = (int)(double)row["LinkedSubCategoryID"]; 110 | scLink.PerCent = (double)row["PerCent"]; 111 | } 112 | } 113 | 114 | return outList; 115 | } 116 | 117 | 118 | private static void TrimStringFields(DataTable dt) 119 | { 120 | foreach (DataColumn column in dt.Columns) 121 | { 122 | if (column.DataType == typeof(string)) 123 | { 124 | foreach (DataRow row in dt.Rows) 125 | { 126 | if (row[column] != DBNull.Value) 127 | { 128 | row[column] = ((string)row[column]).Trim(); 129 | } 130 | } 131 | } 132 | } 133 | } 134 | 135 | 136 | private static DataSet ReadExcelAsDataSet(string fileName) 137 | { 138 | // https://github.com/ExcelDataReader/ExcelDataReader 139 | 140 | // Required for running on .NET Core 141 | System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance); 142 | 143 | using (var stream = File.Open(fileName, FileMode.Open, FileAccess.Read)) 144 | { 145 | using (var reader = ExcelReaderFactory.CreateReader(stream)) 146 | { 147 | var config = new ExcelDataSetConfiguration() 148 | { 149 | ConfigureDataTable = (tableReader) => new ExcelDataTableConfiguration() 150 | { 151 | UseHeaderRow = true 152 | } 153 | }; 154 | 155 | DataSet ds = reader.AsDataSet(config); 156 | return ds; 157 | } 158 | } 159 | } 160 | 161 | } 162 | 163 | } 164 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # csv files are too big for github repository 7 | Contoso Main/Customers from Fake Name Generator/*.csv 8 | 9 | # User-specific files 10 | *.rsuser 11 | *.suo 12 | *.user 13 | *.userosscache 14 | *.sln.docstates 15 | 16 | # User-specific files (MonoDevelop/Xamarin Studio) 17 | *.userprefs 18 | 19 | 20 | 21 | 22 | 23 | 24 | # Build results 25 | [Dd]ebug/ 26 | [Dd]ebugPublic/ 27 | [Rr]elease/ 28 | [Rr]eleases/ 29 | x64/ 30 | x86/ 31 | [Aa][Rr][Mm]/ 32 | [Aa][Rr][Mm]64/ 33 | bld/ 34 | [Bb]in/ 35 | [Oo]bj/ 36 | [Ll]og/ 37 | 38 | # Visual Studio 2015/2017 cache/options directory 39 | .vs/ 40 | # Uncomment if you have tasks that create the project's static files in wwwroot 41 | #wwwroot/ 42 | 43 | # Visual Studio 2017 auto generated files 44 | Generated\ Files/ 45 | 46 | # MSTest test Results 47 | [Tt]est[Rr]esult*/ 48 | [Bb]uild[Ll]og.* 49 | 50 | # NUNIT 51 | *.VisualState.xml 52 | TestResult.xml 53 | 54 | # Build Results of an ATL Project 55 | [Dd]ebugPS/ 56 | [Rr]eleasePS/ 57 | dlldata.c 58 | 59 | # Benchmark Results 60 | BenchmarkDotNet.Artifacts/ 61 | 62 | # .NET Core 63 | project.lock.json 64 | project.fragment.lock.json 65 | artifacts/ 66 | 67 | # StyleCop 68 | StyleCopReport.xml 69 | 70 | # Files built by Visual Studio 71 | *_i.c 72 | *_p.c 73 | *_h.h 74 | *.ilk 75 | *.meta 76 | *.obj 77 | *.iobj 78 | *.pch 79 | *.pdb 80 | *.ipdb 81 | *.pgc 82 | *.pgd 83 | *.rsp 84 | *.sbr 85 | *.tlb 86 | *.tli 87 | *.tlh 88 | *.tmp 89 | *.tmp_proj 90 | *_wpftmp.csproj 91 | *.log 92 | *.vspscc 93 | *.vssscc 94 | .builds 95 | *.pidb 96 | *.svclog 97 | *.scc 98 | 99 | # Chutzpah Test files 100 | _Chutzpah* 101 | 102 | # Visual C++ cache files 103 | ipch/ 104 | *.aps 105 | *.ncb 106 | *.opendb 107 | *.opensdf 108 | *.sdf 109 | *.cachefile 110 | *.VC.db 111 | *.VC.VC.opendb 112 | 113 | # Visual Studio profiler 114 | *.psess 115 | *.vsp 116 | *.vspx 117 | *.sap 118 | 119 | # Visual Studio Trace Files 120 | *.e2e 121 | 122 | # TFS 2012 Local Workspace 123 | $tf/ 124 | 125 | # Guidance Automation Toolkit 126 | *.gpState 127 | 128 | # ReSharper is a .NET coding add-in 129 | _ReSharper*/ 130 | *.[Rr]e[Ss]harper 131 | *.DotSettings.user 132 | 133 | # JustCode is a .NET coding add-in 134 | .JustCode 135 | 136 | # TeamCity is a build add-in 137 | _TeamCity* 138 | 139 | # DotCover is a Code Coverage Tool 140 | *.dotCover 141 | 142 | # AxoCover is a Code Coverage Tool 143 | .axoCover/* 144 | !.axoCover/settings.json 145 | 146 | # Visual Studio code coverage results 147 | *.coverage 148 | *.coveragexml 149 | 150 | # NCrunch 151 | _NCrunch_* 152 | .*crunch*.local.xml 153 | nCrunchTemp_* 154 | 155 | # MightyMoose 156 | *.mm.* 157 | AutoTest.Net/ 158 | 159 | # Web workbench (sass) 160 | .sass-cache/ 161 | 162 | # Installshield output folder 163 | [Ee]xpress/ 164 | 165 | # DocProject is a documentation generator add-in 166 | DocProject/buildhelp/ 167 | DocProject/Help/*.HxT 168 | DocProject/Help/*.HxC 169 | DocProject/Help/*.hhc 170 | DocProject/Help/*.hhk 171 | DocProject/Help/*.hhp 172 | DocProject/Help/Html2 173 | DocProject/Help/html 174 | 175 | # Click-Once directory 176 | publish/ 177 | 178 | # Publish Web Output 179 | *.[Pp]ublish.xml 180 | *.azurePubxml 181 | # Note: Comment the next line if you want to checkin your web deploy settings, 182 | # but database connection strings (with potential passwords) will be unencrypted 183 | *.pubxml 184 | *.publishproj 185 | !FolderProfile.pubxml 186 | 187 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 188 | # checkin your Azure Web App publish settings, but sensitive information contained 189 | # in these scripts will be unencrypted 190 | PublishScripts/ 191 | 192 | # NuGet Packages 193 | *.nupkg 194 | # The packages folder can be ignored because of Package Restore 195 | **/[Pp]ackages/* 196 | # except build/, which is used as an MSBuild target. 197 | !**/[Pp]ackages/build/ 198 | # Uncomment if necessary however generally it will be regenerated when needed 199 | #!**/[Pp]ackages/repositories.config 200 | # NuGet v3's project.json files produces more ignorable files 201 | *.nuget.props 202 | *.nuget.targets 203 | 204 | # Microsoft Azure Build Output 205 | csx/ 206 | *.build.csdef 207 | 208 | # Microsoft Azure Emulator 209 | ecf/ 210 | rcf/ 211 | 212 | # Windows Store app package directories and files 213 | AppPackages/ 214 | BundleArtifacts/ 215 | Package.StoreAssociation.xml 216 | _pkginfo.txt 217 | *.appx 218 | 219 | # Visual Studio cache files 220 | # files ending in .cache can be ignored 221 | *.[Cc]ache 222 | # but keep track of directories ending in .cache 223 | !?*.[Cc]ache/ 224 | 225 | # Others 226 | ClientBin/ 227 | ~$* 228 | *~ 229 | *.dbmdl 230 | *.dbproj.schemaview 231 | *.jfm 232 | *.pfx 233 | *.publishsettings 234 | orleans.codegen.cs 235 | 236 | # Including strong name files can present a security risk 237 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 238 | #*.snk 239 | 240 | # Since there are multiple workflows, uncomment next line to ignore bower_components 241 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 242 | #bower_components/ 243 | 244 | # RIA/Silverlight projects 245 | Generated_Code/ 246 | 247 | # Backup & report files from converting an old project file 248 | # to a newer Visual Studio version. Backup files are not needed, 249 | # because we have git ;-) 250 | _UpgradeReport_Files/ 251 | Backup*/ 252 | UpgradeLog*.XML 253 | UpgradeLog*.htm 254 | ServiceFabricBackup/ 255 | *.rptproj.bak 256 | 257 | # SQL Server files 258 | *.mdf 259 | *.ldf 260 | *.ndf 261 | 262 | # Business Intelligence projects 263 | *.rdl.data 264 | *.bim.layout 265 | *.bim_*.settings 266 | *.rptproj.rsuser 267 | *- Backup*.rdl 268 | 269 | # Microsoft Fakes 270 | FakesAssemblies/ 271 | 272 | # GhostDoc plugin setting file 273 | *.GhostDoc.xml 274 | 275 | # Node.js Tools for Visual Studio 276 | .ntvs_analysis.dat 277 | node_modules/ 278 | 279 | # Visual Studio 6 build log 280 | *.plg 281 | 282 | # Visual Studio 6 workspace options file 283 | *.opt 284 | 285 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 286 | *.vbw 287 | 288 | # Visual Studio LightSwitch build output 289 | **/*.HTMLClient/GeneratedArtifacts 290 | **/*.DesktopClient/GeneratedArtifacts 291 | **/*.DesktopClient/ModelManifest.xml 292 | **/*.Server/GeneratedArtifacts 293 | **/*.Server/ModelManifest.xml 294 | _Pvt_Extensions 295 | 296 | # Paket dependency manager 297 | .paket/paket.exe 298 | paket-files/ 299 | 300 | # FAKE - F# Make 301 | .fake/ 302 | 303 | # JetBrains Rider 304 | .idea/ 305 | *.sln.iml 306 | 307 | # CodeRush personal settings 308 | .cr/personal 309 | 310 | # Python Tools for Visual Studio (PTVS) 311 | __pycache__/ 312 | *.pyc 313 | 314 | # Cake - Uncomment if you are using it 315 | # tools/** 316 | # !tools/packages.config 317 | 318 | # Tabs Studio 319 | *.tss 320 | 321 | # Telerik's JustMock configuration file 322 | *.jmconfig 323 | 324 | # BizTalk build output 325 | *.btp.cs 326 | *.btm.cs 327 | *.odx.cs 328 | *.xsd.cs 329 | 330 | # OpenCover UI analysis results 331 | OpenCover/ 332 | 333 | # Azure Stream Analytics local run output 334 | ASALocalRun/ 335 | 336 | # MSBuild Binary and Structured Log 337 | *.binlog 338 | 339 | # NVidia Nsight GPU debugger configuration file 340 | *.nvuser 341 | 342 | # MFractors (Xamarin productivity tool) working folder 343 | .mfractor/ 344 | 345 | # Local History for Visual Studio 346 | .localhistory/ 347 | 348 | # BeatPulse healthcheck temp database 349 | healthchecksdb -------------------------------------------------------------------------------- /scripts/sql/CreateTablesCommon.sql: -------------------------------------------------------------------------------- 1 | PRINT 'SQL > Creating common tables' 2 | 3 | -- 4 | -- Create Demo schema if it does not exist 5 | -- 6 | IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = 'Data') 7 | BEGIN 8 | -- The schema must be run in its own batch! 9 | EXEC( 'CREATE SCHEMA Data' ); 10 | END 11 | 12 | DROP TABLE IF EXISTS [Data].[OrderRows]; 13 | DROP TABLE IF EXISTS [Data].[Orders]; 14 | DROP TABLE IF EXISTS [Data].[Sales]; 15 | DROP TABLE IF EXISTS [Data].[Customer]; 16 | DROP TABLE IF EXISTS [Data].[Product]; 17 | DROP TABLE IF EXISTS [Data].[Store]; 18 | DROP TABLE IF EXISTS [Data].[CurrencyExchange]; 19 | DROP TABLE IF EXISTS [Data].[Date]; 20 | 21 | 22 | CREATE TABLE [Data].[CurrencyExchange]( 23 | [Date] [date] NOT NULL, 24 | [FromCurrency] [nchar](3) NOT NULL, 25 | [ToCurrency] [nchar](3) NOT NULL, 26 | [Exchange] [float] NOT NULL, 27 | CONSTRAINT [PK_CurrencyExchange] PRIMARY KEY CLUSTERED ( [Date] ASC, [FromCurrency] ASC, [ToCurrency] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 28 | ) ON [PRIMARY] 29 | 30 | 31 | CREATE TABLE [Data].[Customer]( 32 | [CustomerKey] [int] NOT NULL, 33 | [GeoAreaKey] [int] NOT NULL, 34 | [StartDT] [Date] NULL, 35 | [EndDT] [Date] NULL, 36 | [Continent] [nvarchar](50) NULL, 37 | [Gender] [nvarchar](10) NULL, 38 | [Title] [nvarchar](50) NULL, 39 | [GivenName] [nvarchar](150) NULL, 40 | [MiddleInitial] [nvarchar](150) NULL, 41 | [Surname] [nvarchar](150) NULL, 42 | [StreetAddress] [nvarchar](150) NULL, 43 | [City] [nvarchar](50) NULL, 44 | [State] [nvarchar](50) NULL, 45 | [StateFull] [nvarchar](50) NULL, 46 | [ZipCode] [nvarchar](50) NULL, 47 | [Country] [nvarchar](50) NULL, 48 | [CountryFull] [nvarchar](50) NULL, 49 | [Birthday] [Date] NOT NULL, 50 | [Age] [int] NOT NULL, 51 | [Occupation] [nvarchar](100) NULL, 52 | [Company] [nvarchar](50) NULL, 53 | [Vehicle] [nvarchar](50) NULL, 54 | [Latitude] [float] NOT NULL, 55 | [Longitude] [float] NOT NULL, 56 | CONSTRAINT [PK_Customer] PRIMARY KEY CLUSTERED ( [CustomerKey] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 57 | ) ON [PRIMARY] 58 | 59 | 60 | CREATE TABLE [Data].[Date]( 61 | [Date] [Date] NOT NULL , 62 | [DateKey] [nvarchar](50) NOT NULL, 63 | [Year] [int] NOT NULL , 64 | [YearQuarter] [nvarchar](30) NOT NULL, 65 | [YearQuarterNumber] [int] NOT NULL, 66 | [Quarter] [nvarchar](2) NOT NULL, 67 | [YearMonth] [nvarchar](30) NOT NULL, 68 | [YearMonthShort] [nvarchar](30) NOT NULL, 69 | [YearMonthNumber] [int] NOT NULL, 70 | [Month] [nvarchar](30) NOT NULL, 71 | [MonthShort] [nvarchar](30) NOT NULL, 72 | [MonthNumber] [int] NOT NULL , 73 | [DayofWeek] [nvarchar](30) NOT NULL, 74 | [DayofWeekShort] [nvarchar](30) NOT NULL, 75 | [DayofWeekNumber] [int] NOT NULL, 76 | [WorkingDay] [bit] NOT NULL , 77 | [WorkingDayNumber] [int] NOT NULL, 78 | CONSTRAINT [PK_Date] PRIMARY KEY CLUSTERED ( [DateKey] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 79 | ) ON [PRIMARY] 80 | 81 | 82 | CREATE TABLE [Data].[Product]( 83 | [ProductKey] [int] NOT NULL, 84 | [ProductCode] [nvarchar](255) NOT NULL, 85 | [ProductName] [nvarchar](500) NULL, 86 | [Manufacturer] [nvarchar](50) NULL, 87 | [Brand] [nvarchar](50) NULL, 88 | [Color] [nvarchar](20) NULL, 89 | [WeightUnit] [nvarchar](20) NULL, 90 | [Weight] [float] NULL, 91 | [Cost] [money] NULL, 92 | [Price] [money] NULL, 93 | [CategoryKey] [int] NULL, 94 | [CategoryName] [nvarchar](30) NULL, 95 | [SubCategoryKey] [int] NULL, 96 | [SubCategoryName] [nvarchar](50) NULL, 97 | CONSTRAINT [PK_TestProduct] PRIMARY KEY CLUSTERED ( [ProductKey] ASC ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 98 | ) ON [PRIMARY] 99 | 100 | 101 | CREATE TABLE [Data].[Store] ( 102 | [StoreKey] [int] NOT NULL, 103 | [StoreCode] [int] NOT NULL, 104 | [GeoAreaKey] [int] NOT NULL, 105 | [CountryCode] [nvarchar](50) NULL, 106 | [CountryName] [nvarchar](50) NULL, 107 | [State] [nvarchar](100) NULL, 108 | [OpenDate] [Date] NULL, 109 | [CloseDate] [Date] NULL, 110 | [Description] [nvarchar](100) NULL, 111 | [SquareMeters] [int] NULL, 112 | [Status] [nvarchar](50) NULL, 113 | CONSTRAINT [PK_Store] PRIMARY KEY CLUSTERED ( [StoreKey] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY] 114 | ) ON [PRIMARY] 115 | 116 | GO 117 | 118 | ------------------------------------------------------------------------------------------ 119 | -- 120 | -- Views 121 | -- 122 | ------------------------------------------------------------------------------------------ 123 | CREATE OR ALTER VIEW dbo.Customer AS 124 | SELECT 125 | [CustomerKey], 126 | [Gender], 127 | [GivenName] + ' ' + [Surname] AS [Name], 128 | [StreetAddress] AS [Address], 129 | [City], 130 | [State] AS [State Code], 131 | StateFull AS [State], 132 | ZipCode AS [Zip Code], 133 | [Country] as [Country Code], 134 | [CountryFull] as [Country], 135 | [Continent], 136 | [Birthday], 137 | [Age] as [Age] 138 | FROM 139 | [Data].Customer 140 | GO 141 | 142 | 143 | CREATE OR ALTER VIEW dbo.Product AS 144 | SELECT 145 | ProductKey, 146 | ProductCode AS [Product Code], 147 | ProductName AS [Product Name], 148 | [Manufacturer], 149 | [Brand], 150 | [Color], 151 | WeightUnit AS [Weight Unit Measure], 152 | [Weight], 153 | Cost AS [Unit Cost], 154 | Price AS [Unit Price], 155 | SubcategoryKey AS [Subcategory Code], 156 | SubCategoryName AS [Subcategory], 157 | CategoryKey AS [Category Code], 158 | CategoryName AS [Category] 159 | FROM [Data].Product 160 | 161 | GO 162 | 163 | CREATE OR ALTER VIEW dbo.[Currency Exchange] AS 164 | SELECT 165 | [Date], 166 | [FromCurrency], 167 | [ToCurrency], 168 | [Exchange] 169 | FROM [Data].[CurrencyExchange] 170 | GO 171 | 172 | 173 | CREATE OR ALTER VIEW dbo.Store AS 174 | SELECT 175 | StoreKey, 176 | StoreCode AS [Store Code], 177 | CountryName AS [Country], 178 | [State], 179 | [Description] AS [Name], 180 | SquareMeters AS [Square Meters], 181 | OpenDate AS [Open Date], 182 | CloseDate AS [Close Date], 183 | [Status] 184 | FROM 185 | [Data].Store 186 | GO 187 | 188 | CREATE OR ALTER VIEW dbo.Date AS 189 | SELECT 190 | [Date], 191 | -- [DateKey], -- We do not import DateKey in the view 192 | [Year], 193 | YearQuarter AS [Year Quarter], 194 | YearQuarterNumber AS [Year Quarter Number], 195 | [Quarter], 196 | YearMonth AS [Year Month], 197 | YearMonthShort AS [Year Month Short], 198 | YearMonthNumber AS [Year Month Number], 199 | [Month], 200 | MonthShort AS [Month Short], 201 | MonthNumber AS [Month Number], 202 | [DayofWeek] AS [Day of Week], 203 | DayofWeekShort AS [Day of Week Short], 204 | DayofWeekNumber AS [Day of Week Number], 205 | WorkingDay AS [Working Day], 206 | WorkingDayNumber AS [Working Day Number] 207 | FROM 208 | [Data].[Date] 209 | GO 210 | 211 | -------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Parquet/ParquetWriter.cs: -------------------------------------------------------------------------------- 1 | using DatabaseGenerator.Models; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | using System.Linq; 7 | using System.IO; 8 | using Parquet.Serialization; 9 | using DatabaseGenerator.DataWriter.Parquet; 10 | using ParquetLib = Parquet; 11 | 12 | 13 | namespace DatabaseGenerator.DataWriter 14 | { 15 | 16 | public class ParquetWriter : IDataWriter 17 | { 18 | 19 | // config 20 | private string _outputFolder; 21 | private bool _isDeltaTableOutput; 22 | private long _maxOrdersPerFile; 23 | private int _parquetOrdersRowGroupSize; 24 | private SOOutput _SOOutput; 25 | 26 | // running 27 | private int _fileCounter; // counter of generated files 28 | private int _rowGroupCounter; // counter of generated parquet "row groups" 29 | private int _currentFileOrderCounter; // number of orders of the current file 30 | private bool _appendToCurrentFile; // "false" if the current is new 31 | private List _memoryBufferOrders; 32 | private List _memoryBufferSales; 33 | private List _ordersFiles; 34 | private List _orderRowsFiles; 35 | private List _salesFiles; 36 | 37 | // logging 38 | private void Log(string message) { Logger.Info($"Parquet : {message}"); } 39 | 40 | 41 | 42 | public ParquetWriter(Config config, string outputFolder) 43 | { 44 | _SOOutput = config.SalesOrdersOut; 45 | 46 | if (config.DeltaTableOrdersPerFile == 0) config.DeltaTableOrdersPerFile = int.MaxValue; 47 | 48 | _isDeltaTableOutput = (config.OutputFormat == "DELTATABLE"); 49 | _outputFolder = outputFolder; 50 | _maxOrdersPerFile = _isDeltaTableOutput ? config.DeltaTableOrdersPerFile : long.MaxValue; 51 | _parquetOrdersRowGroupSize = config.ParquetOrdersRowGroupSize; 52 | 53 | if (_parquetOrdersRowGroupSize < 1) _parquetOrdersRowGroupSize = 500 * 1000; 54 | } 55 | 56 | 57 | public void Init() 58 | { 59 | _fileCounter = 0; 60 | _currentFileOrderCounter = 0; 61 | _appendToCurrentFile = false; 62 | _memoryBufferOrders = new List(); 63 | _memoryBufferSales = new List(); 64 | _ordersFiles = new List(); 65 | _orderRowsFiles = new List(); 66 | _salesFiles = new List(); 67 | } 68 | 69 | 70 | public async Task WriteOrderWithRows(Order order, IEnumerable sales) 71 | { 72 | await Task.CompletedTask; 73 | 74 | _memoryBufferOrders.Add(order); 75 | _memoryBufferSales.AddRange(sales); 76 | _currentFileOrderCounter++; 77 | 78 | if (_memoryBufferOrders.Count == _parquetOrdersRowGroupSize || _currentFileOrderCounter >= _maxOrdersPerFile) 79 | { 80 | await WriteParquetFiles(); 81 | _memoryBufferOrders.Clear(); 82 | _memoryBufferSales.Clear(); 83 | } 84 | 85 | if (_currentFileOrderCounter >= _maxOrdersPerFile) 86 | { 87 | Logger.Info("New parquet file"); 88 | _fileCounter++; 89 | _currentFileOrderCounter = 0; 90 | _appendToCurrentFile = false; 91 | } 92 | } 93 | 94 | 95 | public async Task WriteStaticData(IEnumerable customers, 96 | IEnumerable stores, 97 | IEnumerable products, 98 | IEnumerable dates, 99 | IEnumerable currencyExchanges) 100 | { 101 | var parOpt = new ParquetSerializerOptions() { Append = false }; 102 | 103 | Log("Customers"); 104 | var parquetCustomers = customers.OrderBy(x => x.CustomerID).Select(x => ParquetCustomer.FromCustomer(x)); 105 | await ParquetSerializer.SerializeAsync(parquetCustomers, Path.Combine(_outputFolder, $"{Consts.CUSTOMERS}.parquet"), parOpt); 106 | 107 | Log("Stores"); 108 | var parquetStores = stores.Select(x => ParquetStore.FromStore(x)); 109 | await ParquetSerializer.SerializeAsync(parquetStores, Path.Combine(_outputFolder, $"{Consts.STORES}.parquet"), parOpt); 110 | 111 | Log("Products"); 112 | var parquetProducts = products.Select(x => ParquetProduct.FromProduct(x)); 113 | await ParquetSerializer.SerializeAsync(parquetProducts, Path.Combine(_outputFolder, $"{Consts.PRODUCTS}.parquet"), parOpt); 114 | 115 | Log("Dates"); 116 | var parquetDates = dates.Select(x => ParquetDateExtended.FromDateExtended(x)); 117 | await ParquetSerializer.SerializeAsync(parquetDates, Path.Combine(_outputFolder, $"{Consts.DATES}.parquet"), parOpt); 118 | 119 | Log("CurrencyExchange"); 120 | var parquetCurrExchanges = currencyExchanges.Select(x => ParquetCurrencyExchange.FromCurrencyExchange(x)); 121 | await ParquetSerializer.SerializeAsync(parquetCurrExchanges, Path.Combine(_outputFolder, $"{Consts.CURREXCHS}.parquet"), parOpt); 122 | } 123 | 124 | 125 | public void Close() 126 | { 127 | if (_memoryBufferOrders.Count > 0) 128 | { 129 | WriteParquetFiles().Wait(); 130 | } 131 | 132 | if (_isDeltaTableOutput) 133 | { 134 | Log($"Delta Table conversion"); 135 | 136 | if (_SOOutput.WriteOrders) WriteDeltaTableFiles(_ordersFiles, $"{Consts.ORDERS}", CreateDeltaLogBody(_ordersFiles, ParquetOrder.GETDeltaSchema())); 137 | if (_SOOutput.WriteOrders) WriteDeltaTableFiles(_orderRowsFiles, $"{Consts.ORDERROWS}", CreateDeltaLogBody(_orderRowsFiles, ParquetOrderRow.GETDeltaSchema())); 138 | if (_SOOutput.WriteSales) WriteDeltaTableFiles(_salesFiles, $"{Consts.SALES}", CreateDeltaLogBody(_salesFiles, ParquetSale.GETDeltaSchema())); 139 | 140 | WriteDeltaTableFiles($"{Consts.CUSTOMERS}.parquet", $"{Consts.CUSTOMERS}", CreateDeltaLogBody($"{Consts.CUSTOMERS}.parquet", ParquetCustomer.GETDeltaSchema())); 141 | WriteDeltaTableFiles($"{Consts.STORES}.parquet", $"{Consts.STORES}", CreateDeltaLogBody($"{Consts.STORES}.parquet", ParquetStore.GETDeltaSchema())); 142 | WriteDeltaTableFiles($"{Consts.PRODUCTS}.parquet", $"{Consts.PRODUCTS}", CreateDeltaLogBody($"{Consts.PRODUCTS}.parquet", ParquetProduct.GETDeltaSchema())); 143 | WriteDeltaTableFiles($"{Consts.DATES}.parquet", $"{Consts.DATES}", CreateDeltaLogBody($"{Consts.DATES}.parquet", ParquetDateExtended.GETDeltaSchema())); 144 | WriteDeltaTableFiles($"{Consts.CURREXCHS}.parquet", $"{Consts.CURREXCHS}", CreateDeltaLogBody($"{Consts.CURREXCHS}.parquet", ParquetCurrencyExchange.GETDeltaSchema())); 145 | } 146 | else 147 | { 148 | // rename parquet files 149 | if (_SOOutput.WriteOrders) File.Move(Path.Combine(_outputFolder, _ordersFiles[0]), Path.Combine(_outputFolder, _ordersFiles[0].Replace("_0.parquet", ".parquet"))); 150 | if (_SOOutput.WriteOrders) File.Move(Path.Combine(_outputFolder, _orderRowsFiles[0]), Path.Combine(_outputFolder, _orderRowsFiles[0].Replace("_0.parquet", ".parquet"))); 151 | if (_SOOutput.WriteSales) File.Move(Path.Combine(_outputFolder, _salesFiles[0]), Path.Combine(_outputFolder, _salesFiles[0].Replace("_0.parquet", ".parquet"))); 152 | } 153 | } 154 | 155 | 156 | public string CreateDeltaLogBody(string parquetFileName, string deltaSchema) 157 | { 158 | return CreateDeltaLogBody(new List { parquetFileName }, deltaSchema); 159 | } 160 | 161 | 162 | public string CreateDeltaLogBody(List parquetFileList, string deltaSchema) 163 | { 164 | long unixnow = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(); 165 | 166 | var body = new StringBuilder(); 167 | 168 | body.AppendLine("{\"protocol\":{\"minReaderVersion\":1,\"minWriterVersion\":2}}"); 169 | body.AppendLine("{\"metaData\":{\"id\":\"###GUID###\",\"name\":null,\"description\":null,\"format\":{\"provider\":\"parquet\",\"options\":{}},\"schemaString\":\"###SCHEMA###\",\"partitionColumns\":[],\"createdTime\":###NOW###,\"configuration\":{}}}"); 170 | 171 | foreach (var parquetFileName in parquetFileList) 172 | { 173 | long parquetFileSize = new FileInfo(Path.Combine(_outputFolder, parquetFileName)).Length; 174 | string addFileLine = "{\"add\":{\"path\":\"###FILENAME###\",\"size\":###SIZE###,\"partitionValues\":{},\"modificationTime\":###NOW###,\"dataChange\":true,\"stats\":null}}"; 175 | addFileLine = addFileLine.Replace("###FILENAME###", parquetFileName); 176 | addFileLine = addFileLine.Replace("###SIZE###", parquetFileSize.ToString()); 177 | addFileLine = addFileLine.Replace("###NOW###", unixnow.ToString()); 178 | body.AppendLine(addFileLine); 179 | } 180 | 181 | body = body.Replace("###SCHEMA###", deltaSchema.Replace("\"", "\\\"")); 182 | body = body.Replace("###NOW###", unixnow.ToString()); 183 | body = body.Replace("###GUID###", Guid.NewGuid().ToString()); 184 | 185 | return body.ToString(); 186 | } 187 | 188 | 189 | private void WriteDeltaTableFiles(string parquetFileName, string tableName, string deltaLogBody) 190 | { 191 | WriteDeltaTableFiles(new List { parquetFileName }, tableName, deltaLogBody); 192 | } 193 | 194 | 195 | private void WriteDeltaTableFiles(List parquetFileList, string tableName, string deltaLogBody) 196 | { 197 | Directory.CreateDirectory(Path.Combine(_outputFolder, tableName)); 198 | Directory.CreateDirectory(Path.Combine(_outputFolder, tableName, "_delta_log")); 199 | 200 | // log file 201 | File.WriteAllText(Path.Combine(_outputFolder, tableName, "_delta_log", "00000000000000000000.json"), deltaLogBody); 202 | 203 | // parquet file 204 | foreach (var parquetFileName in parquetFileList) 205 | { 206 | File.Move(Path.Combine(_outputFolder, parquetFileName), Path.Combine(_outputFolder, tableName, parquetFileName), false); 207 | } 208 | } 209 | 210 | 211 | private async Task WriteParquetFiles() 212 | { 213 | // Check if there is something to be written 214 | if (_memoryBufferOrders.Any()) 215 | { 216 | _rowGroupCounter++; 217 | 218 | Logger.Info($"Parquet file dump > CurrentFileID: {_fileCounter} Orders: {_memoryBufferOrders.Count} (TotalRowGroups: {_rowGroupCounter})"); 219 | 220 | var parOpt = new ParquetSerializerOptions() 221 | { 222 | Append = _appendToCurrentFile, 223 | //CompressionMethod = ParquetLib.CompressionMethod.Gzip 224 | }; 225 | 226 | if (_SOOutput.WriteOrders) 227 | { 228 | var parquetOrders = _memoryBufferOrders.Select(order => ParquetOrder.FromOrder(order)); 229 | var parquetRows = _memoryBufferOrders.SelectMany(order => order.Rows.Select(row => ParquetOrderRow.FromOrderRow(order, row))); 230 | string ordersFileName = $"{Consts.ORDERS}_{_fileCounter}.parquet"; 231 | string orderRowsFileName = $"{Consts.ORDERROWS}_{_fileCounter}.parquet"; 232 | await ParquetSerializer.SerializeAsync(parquetOrders, Path.Combine(_outputFolder, ordersFileName), parOpt); 233 | await ParquetSerializer.SerializeAsync(parquetRows, Path.Combine(_outputFolder, orderRowsFileName), parOpt); 234 | if (!_ordersFiles.Contains(ordersFileName)) _ordersFiles.Add(ordersFileName); 235 | if (!_orderRowsFiles.Contains(orderRowsFileName)) _orderRowsFiles.Add(orderRowsFileName); 236 | } 237 | 238 | if (_SOOutput.WriteSales) 239 | { 240 | var parquetSales = _memoryBufferSales.Select(sale => ParquetSale.FromSale(sale)); 241 | string salesFileName = $"{Consts.SALES}_{_fileCounter}.parquet"; 242 | await ParquetSerializer.SerializeAsync(parquetSales, Path.Combine(_outputFolder, salesFileName), parOpt); 243 | if (!_salesFiles.Contains(salesFileName)) _salesFiles.Add(salesFileName); 244 | } 245 | 246 | _appendToCurrentFile = true; 247 | } 248 | } 249 | 250 | } 251 | 252 | } -------------------------------------------------------------------------------- /docs/imgs/schema_sales.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
Sales
Sales
Customers
Customers
Dates
Dates
CurrencyExchanges
CurrencyExchanges
Products
Products
Stores
Stores
Text is not SVG - cannot display
-------------------------------------------------------------------------------- /DatabaseGenerator/DataWriter/Csv/CSVWriter.cs: -------------------------------------------------------------------------------- 1 | using CsvHelper; 2 | using CsvHelper.Configuration; 3 | using CsvHelper.TypeConversion; 4 | using DatabaseGenerator.DataWriter.Csv; 5 | using DatabaseGenerator.Models; 6 | using System; 7 | using System.Collections.Generic; 8 | using System.Globalization; 9 | using System.IO; 10 | using System.IO.Compression; 11 | using System.Linq; 12 | using System.Reflection.Emit; 13 | using System.Threading.Tasks; 14 | 15 | 16 | namespace DatabaseGenerator.DataWriter 17 | { 18 | 19 | public class CSVWriter : IDataWriter 20 | { 21 | 22 | // config 23 | private string _outputFolder; 24 | private bool _gzCompression; 25 | private int? _maxOrdersPerFile; 26 | private SOOutput _SOOutput; 27 | 28 | // running 29 | private int _filesCounter; 30 | private int _currentFileRowsCounter; 31 | private List _ordersFileNames; 32 | private List _orderRowsFileNames; 33 | private List _salesFileNames; 34 | private CultureInfo _outputCultureInfo; 35 | private bool _fileOpen; 36 | private StreamWriter _orderFileStream; 37 | private StreamWriter _orderRowsFileStream; 38 | private StreamWriter _salesFileStream; 39 | 40 | 41 | 42 | public CSVWriter(Config config, string outputFolder) 43 | { 44 | _SOOutput = config.SalesOrdersOut; 45 | 46 | _maxOrdersPerFile = config.CsvMaxOrdersPerFile; 47 | if (_maxOrdersPerFile < 1) _maxOrdersPerFile = null; 48 | 49 | _gzCompression = config.CsvGzCompression == 1; 50 | _outputFolder = outputFolder; 51 | _outputCultureInfo = new CultureInfo(String.Empty); // Creates an InvariantCulture that can be modified 52 | _outputCultureInfo.NumberFormat.NumberDecimalSeparator = "."; // ...to be sure :) 53 | } 54 | 55 | 56 | public void Init() 57 | { 58 | _filesCounter = 0; 59 | _fileOpen = false; 60 | _ordersFileNames = new List(); 61 | _orderRowsFileNames = new List(); 62 | _salesFileNames = new List(); 63 | } 64 | 65 | 66 | public async Task WriteOrderWithRows(Order order, IEnumerable sales) 67 | { 68 | await Task.CompletedTask; 69 | 70 | // Create output file 71 | if (_fileOpen == false) 72 | { 73 | _fileOpen = true; 74 | 75 | if (_SOOutput.WriteOrders) 76 | { 77 | string ordersFileName = _maxOrdersPerFile.HasValue ? $"{Consts.ORDERS}_{_filesCounter}.csv" : $"{Consts.ORDERS}.csv"; 78 | string orderRowsFileName = _maxOrdersPerFile.HasValue ? $"{Consts.ORDERROWS}_{_filesCounter}.csv" : $"{Consts.ORDERROWS}.csv"; 79 | string ordersFileFP = Path.Combine(_outputFolder, ordersFileName); 80 | string orderRowsFileFP = Path.Combine(_outputFolder, orderRowsFileName); 81 | _ordersFileNames.Add(ordersFileName); 82 | _orderRowsFileNames.Add(orderRowsFileName); 83 | _orderFileStream = new StreamWriter(ordersFileFP); 84 | _orderRowsFileStream = new StreamWriter(orderRowsFileFP); 85 | _orderFileStream.WriteLine(String.Join(",", DumpOrders_Headers())); 86 | _orderRowsFileStream.WriteLine(String.Join(",", DumpOrderRows_Headers())); 87 | } 88 | 89 | if (_SOOutput.WriteSales) 90 | { 91 | string salesFileName = _maxOrdersPerFile.HasValue ? $"{Consts.SALES}_{_filesCounter}.csv" : $"{Consts.SALES}.csv"; 92 | string salesFileFP = Path.Combine(_outputFolder, salesFileName); 93 | _salesFileNames.Add(salesFileName); 94 | _salesFileStream = new StreamWriter(salesFileFP); 95 | //_salesFileStream = new StreamWriter(new FileStream(salesFileFP, FileMode.Create, FileAccess.Write, FileShare.None, 10 * 1024 * 1024)); 96 | _salesFileStream.WriteLine(String.Join(",", DumpSales_Headers())); 97 | } 98 | } 99 | 100 | // Write data to csv files 101 | if (_SOOutput.WriteOrders) 102 | { 103 | _orderFileStream.WriteLine(String.Join(",", DumpOrders_DataField(order))); 104 | foreach (var orderRow in order.Rows) 105 | { 106 | _orderRowsFileStream.WriteLine(String.Join(",", DumpOrderRows_DataFields(order, orderRow, _outputCultureInfo))); 107 | } 108 | } 109 | if (_SOOutput.WriteSales) 110 | { 111 | foreach (var sale in sales) 112 | { 113 | _salesFileStream.WriteLine(String.Join(",", DumpSale_DataField(sale, _outputCultureInfo))); 114 | } 115 | } 116 | 117 | // new file? 118 | _currentFileRowsCounter++; 119 | if (_maxOrdersPerFile.HasValue && _currentFileRowsCounter >= _maxOrdersPerFile) 120 | { 121 | if (_SOOutput.WriteOrders) 122 | { 123 | _orderFileStream.Close(); 124 | _orderRowsFileStream.Close(); 125 | _orderFileStream = null; 126 | _orderRowsFileStream = null; 127 | } 128 | 129 | if (_SOOutput.WriteSales) 130 | { 131 | _salesFileStream.Close(); 132 | _salesFileStream = null; 133 | } 134 | 135 | _filesCounter++; 136 | _currentFileRowsCounter = 0; 137 | _fileOpen = false; 138 | } 139 | } 140 | 141 | 142 | public async Task WriteStaticData(IEnumerable customers, 143 | IEnumerable stores, 144 | IEnumerable products, 145 | IEnumerable dates, 146 | IEnumerable currencyExchanges) 147 | { 148 | Logger.Info("stores csv"); 149 | using (var csv = new CsvWriter(new StreamWriter(Path.Combine(_outputFolder, $"{Consts.STORES}.csv")), CultureInfo.InvariantCulture)) 150 | { 151 | CsvWriterSetISODate(csv); 152 | await csv.WriteRecordsAsync(stores.Select(s => CsvStore.GetFromStore(s))); 153 | } 154 | 155 | Logger.Info("products csv"); 156 | using (var csv = new CsvWriter(new StreamWriter(Path.Combine(_outputFolder, $"{Consts.PRODUCTS}.csv")), CultureInfo.InvariantCulture)) 157 | { 158 | csv.WriteRecords(products.Select(p => CsvProduct.GetFromProduct(p))); 159 | } 160 | 161 | Logger.Info("dates csv"); 162 | using (var csv = new CsvWriter(new StreamWriter(Path.Combine(_outputFolder, $"{Consts.DATES}.csv")), CultureInfo.InvariantCulture)) 163 | { 164 | CsvWriterSetISODate(csv); 165 | csv.WriteRecords(dates.Select(d => CsvDateExtended.FromDateExtended(d))); 166 | } 167 | 168 | Logger.Info("currency-exchanges csv"); 169 | using (var csv = new CsvWriter(new StreamWriter(Path.Combine(_outputFolder, $"{Consts.CURREXCHS}.csv")), CultureInfo.InvariantCulture)) 170 | { 171 | CsvWriterSetISODate(csv); 172 | csv.WriteRecords(currencyExchanges.Select(ce => CsvCurrencyExchange.FromCurrencyExchange(ce))); 173 | } 174 | 175 | Logger.Info("customers csv"); 176 | using (var csv = new CsvWriter(new StreamWriter(Path.Combine(_outputFolder, $"{Consts.CUSTOMERS}.csv")), CultureInfo.InvariantCulture)) 177 | { 178 | CsvWriterSetISODate(csv); 179 | csv.WriteRecords(customers.Select(c => CsvCustomer.FromCustomer(c)).OrderBy(x => x.CustomerKey)); 180 | } 181 | } 182 | 183 | 184 | private void CsvWriterSetISODate(CsvWriter csv) 185 | { 186 | var tcoDT = new TypeConverterOptions() { Formats = new[] { "yyyy-MM-dd" } }; 187 | csv.Context.TypeConverterOptionsCache.AddOptions(tcoDT); 188 | csv.Context.TypeConverterOptionsCache.AddOptions(tcoDT); 189 | } 190 | 191 | 192 | public void Close() 193 | { 194 | if (_fileOpen) 195 | { 196 | if (_SOOutput.WriteOrders) _orderFileStream.Close(); 197 | if (_SOOutput.WriteOrders) _orderRowsFileStream.Close(); 198 | if (_SOOutput.WriteSales) _salesFileStream.Close(); 199 | } 200 | 201 | if (_maxOrdersPerFile.HasValue) 202 | { 203 | Logger.Info($"Move csv files to subfoldes"); 204 | 205 | string ordersFolder = Path.Combine(_outputFolder, Consts.ORDERS); 206 | string orderRowsFolder = Path.Combine(_outputFolder, Consts.ORDERROWS); 207 | string salesFolder = Path.Combine(_outputFolder, Consts.SALES); 208 | 209 | if (_SOOutput.WriteOrders) Directory.CreateDirectory(ordersFolder); 210 | if (_SOOutput.WriteOrders) Directory.CreateDirectory(orderRowsFolder); 211 | if (_SOOutput.WriteSales) Directory.CreateDirectory(salesFolder); 212 | 213 | if (_SOOutput.WriteOrders) _ordersFileNames.ForEach(fn => MoveAndGZ(fn, ordersFolder)); 214 | if (_SOOutput.WriteOrders) _orderRowsFileNames.ForEach(fn => MoveAndGZ(fn, orderRowsFolder)); 215 | if (_SOOutput.WriteSales) _salesFileNames.ForEach(fn => MoveAndGZ(fn, salesFolder)); 216 | } 217 | } 218 | 219 | 220 | private void MoveAndGZ(string fileName, string destFolder) 221 | { 222 | string srcFile = Path.Combine(_outputFolder, fileName); 223 | string dstFile = Path.Combine(destFolder, fileName); 224 | string gzFile = Path.Combine(destFolder, fileName + ".gz"); 225 | 226 | File.Move(srcFile, dstFile); 227 | 228 | if (_gzCompression) 229 | { 230 | GzCompress(dstFile, gzFile, true); 231 | } 232 | } 233 | 234 | 235 | private void GzCompress(string inFileName, string gzFileName, bool deleteOriginal) 236 | { 237 | using (var inStream = File.OpenRead(inFileName)) 238 | using (var outStream = File.OpenWrite(gzFileName)) 239 | { 240 | using (var gzip = new GZipStream(outStream, CompressionMode.Compress, false)) 241 | { 242 | inStream.CopyTo(gzip); 243 | } 244 | } 245 | 246 | if (deleteOriginal) File.Delete(inFileName); 247 | } 248 | 249 | 250 | public string[] DumpOrders_Headers() 251 | { 252 | return new string[] 253 | { 254 | "OrderKey", 255 | "CustomerKey", 256 | "StoreKey", 257 | "OrderDate", 258 | "DeliveryDate", 259 | "CurrencyCode" 260 | }; 261 | } 262 | 263 | 264 | public string[] DumpOrders_DataField(Order order) 265 | { 266 | return new string[] 267 | { 268 | order.OrderID.ToString(), 269 | order.CustomerID.ToString(), 270 | order.StoreID.ToString(), 271 | order.DT.ToString("yyyy-MM-dd"), 272 | order.DeliveryDate.ToString("yyyy-MM-dd"), 273 | order.CurrencyCode 274 | }; 275 | } 276 | 277 | 278 | private string[] DumpOrderRows_Headers() 279 | { 280 | return new string[] 281 | { 282 | "OrderKey", 283 | "LineNumber", 284 | "ProductKey", 285 | "Quantity", 286 | "UnitPrice", 287 | "NetPrice", 288 | "UnitCost" 289 | }; 290 | } 291 | 292 | 293 | private string[] DumpOrderRows_DataFields(Order order, OrderRow orderRow, CultureInfo ci) 294 | { 295 | return new string[] 296 | { 297 | order.OrderID.ToString(), 298 | orderRow.RowNumber.ToString(), 299 | orderRow.ProductID.ToString(), 300 | orderRow.Quantity.ToString(), 301 | orderRow.UnitPrice.ToString(ci), 302 | orderRow.NetPrice.ToString(ci), 303 | orderRow.UnitCost.ToString(ci) 304 | }; 305 | } 306 | 307 | public string[] DumpSales_Headers() 308 | { 309 | return new string[] 310 | { 311 | "OrderKey", 312 | "LineNumber", 313 | "OrderDate", 314 | "DeliveryDate", 315 | "CustomerKey", 316 | "StoreKey", 317 | "ProductKey", 318 | "Quantity", 319 | "UnitPrice", 320 | "NetPrice", 321 | "UnitCost", 322 | "CurrencyCode", 323 | "ExchangeRate" 324 | }; 325 | } 326 | 327 | public string[] DumpSale_DataField(Sale sale, CultureInfo ci) 328 | { 329 | return new string[] 330 | { 331 | sale.OrderKey.ToString(), 332 | sale.LineNumber.ToString(), 333 | sale.OrderDate.ToString("yyyy-MM-dd"), 334 | sale.DeliveryDate.ToString("yyyy-MM-dd"), 335 | sale.CustomerKey.ToString(), 336 | sale.StoreKey.ToString(), 337 | sale.ProductKey.ToString(), 338 | sale.Quantity.ToString(), 339 | sale.UnitPrice.ToString(ci), 340 | sale.NetPrice.ToString(ci), 341 | sale.UnitCost.ToString(ci), 342 | sale.CurrencyCode, 343 | sale.ExchangeRate.ToString(ci) 344 | }; 345 | } 346 | 347 | } 348 | 349 | } 350 | -------------------------------------------------------------------------------- /docs/imgs/schema_orders.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
Orders
Orders
Customers
Customers
Products
Products
Stores
Stores
OrderRows
OrderRows
Dates
Dates
CurrencyExchanges
CurrencyExchanges
Text is not SVG - cannot display
--------------------------------------------------------------------------------