├── README.md ├── createaggregateswithcetas ├── createaggregateswithcetas.sql └── triggeraggregatescetassp ├── createview ├── DataLakeMetaData.csv ├── README.txt ├── spGenerateViews.sql ├── spTriggerGenerateViews.sql └── vwViewMetadata.sql ├── createviewsdynamically ├── dataprocessed.sql ├── dependencies └── ObjectDependencies.sql ├── externaltablemetadata.sql ├── querystoragelogs ├── sqlcreatequeryviewforstoragelogs └── sqlquerystoragelogs.sql └── showfileerrors.sql /README.md: -------------------------------------------------------------------------------- 1 | # serverlesssqlpooltools (sspt) 2 | SQL scripts for Azure Synapse Analytics Serverless SQL Pools 3 | 4 | These are utility scripts for various operations in Azure Synapse Analytics Serverless SQL Pools. 5 | 6 | --- 7 | 8 | ## createaggregateswithcetas 9 | This is a dynamic CREATE EXTERNAL TABLE AS SELECT process to write data out to an Azure Data Lake Gen2 account. This is the code that supports blog post https://www.serverlesssql.com/optimisation/using-cetas-to-create-pre-calculated-datasets/ 10 |

11 | 12 | ## createview 13 | This is a dynamic CREATE VIEW process which loads an external metadata CSV file then iterates and creates the appropriate views 14 |

15 | 16 | ## querystoragelogs 17 | This is code to create a Serverless SQL Pools LogAnalytics database and then a View over the Azure Storage log format. Supports blog post https://www.serverlesssql.com/optimisation/use-azure-storage-logs-to-analyse-synapse-analytics-serverless-sql-pools-activity/ 18 |

19 | 20 | ## createviewsdynamically.sql 21 | Basic SQL statement which can be used to construct a CREATE VIEW statement dynamically using file metadata extraction from sp_describe_first_result_set. 22 |

23 | 24 | ## dataprocessed.sql 25 | Shows the data processed metric vs the daily/weekly/monthly TB (terabytes) limits set. 26 |

27 | 28 | ## externaltablemetadata.sql 29 | Shows the metadata attached to an External Table such as file formats and data source. 30 |

31 | 32 | ## showerrorfiles.sql 33 | Creates a view to show errors logged as part of the OPENROWSET ERRORFILE_LOCATION and ERRORFILE_DATA_SOURCE error handling process. 34 |

35 | 36 | ## Create Aggregate Datasets using CETAS 37 | In the folder **createaggregateswithcetas** is a stored procedure that when passed a View name, will select * from the view and write back to a new folder in the Data Lake. It also creates a new view over this data and only selects the current data. 38 | -------------------------------------------------------------------------------- /createaggregateswithcetas/createaggregateswithcetas.sql: -------------------------------------------------------------------------------- 1 | CREATE PROCEDURE LDW.GeneratePreComputedDatasets 2 | @SourceView NVARCHAR(200), 3 | @Location NVARCHAR(1000), 4 | @DataSource NVARCHAR(100), 5 | @FileFormat NVARCHAR(100) 6 | AS 7 | 8 | BEGIN 9 | 10 | --declare variables,set the process date for the folder name, and set the locations 11 | DECLARE @LocationFull NVARCHAR(1100), 12 | @LocationTopLevel NVARCHAR(1100), 13 | @ProcessDate NCHAR(16), 14 | @SQLDrop NVARCHAR(2000), 15 | @CreateExternalTableString NVARCHAR(2000), 16 | @CreateCurrentAggregateView NVARCHAR(2000) 17 | 18 | SET @ProcessDate = FORMAT(GETDATE(),'yyyyMMddHHmmss') 19 | 20 | SET @LocationFull = CONCAT(@Location,REPLACE(@SourceView,'.',''),'/',@ProcessDate) 21 | SET @LocationTopLevel = CONCAT(@Location,REPLACE(@SourceView,'.','')) 22 | 23 | --Check for existence of an external table and drop if found 24 | SET @SQLDrop = 'IF OBJECT_ID(''' + REPLACE(@SourceView,'.vw','') + '_PreComputeTable'') IS NOT NULL BEGIN DROP EXTERNAL TABLE ' + REPLACE(@SourceView,'.vw','') + '_PreComputeTable END' 25 | 26 | EXEC sp_executesql @SQLDrop 27 | 28 | --generate the SQL script to create the external table and export the View data 29 | SET @CreateExternalTableString = 30 | 'CREATE EXTERNAL TABLE ' + REPLACE(@SourceView,'.vw','') + '_PreComputeTable 31 | WITH 32 | ( 33 | LOCATION = ''' + @LocationFull + ''', 34 | DATA_SOURCE = ' + @DataSource + ', 35 | FILE_FORMAT = ' + @FileFormat + ' 36 | ) 37 | AS 38 | SELECT 39 | * 40 | FROM ' + @SourceView 41 | 42 | EXEC sp_executesql @CreateExternalTableString 43 | 44 | 45 | --drop the external table as we do not need it, it is only being used to generate the data. We'll use a View to select the data 46 | SET @SQLDrop = 47 | 'IF OBJECT_ID(''' + REPLACE(@SourceView,'.vw','') + '_PreComputeTable'') IS NOT NULL BEGIN DROP EXTERNAL TABLE ' + REPLACE(@SourceView,'.vw','') + '_PreComputeTable END' 48 | 49 | EXEC sp_executesql @SQLDrop 50 | 51 | 52 | --drop the existing precompute View, we will re-create it 53 | SET @SQLDrop = 54 | 'IF OBJECT_ID(''' + @SourceView + '_PreComputeCurrent'') IS NOT NULL BEGIN DROP VIEW ' + @SourceView + '_PreComputeCurrent END' 55 | 56 | EXEC sp_executesql @SQLDrop 57 | 58 | --create a view to show the current data 59 | SET @CreateCurrentAggregateView = 60 | 'CREATE VIEW ' + @SourceView + '_PreComputeCurrent 61 | AS 62 | WITH CurrentFolder 63 | AS 64 | ( 65 | SELECT MAX(fct.filepath(1)) AS CurrentAggregates 66 | FROM 67 | OPENROWSET 68 | ( 69 | BULK ''' + @LocationTopLevel + '/*/*.parquet'', 70 | DATA_SOURCE = ''' + @DataSource + ''', 71 | FORMAT = ''Parquet'' 72 | ) AS fct 73 | ) 74 | SELECT fct.filepath(1) AS CurrentAggregateFolder, 75 | * 76 | FROM 77 | OPENROWSET 78 | ( 79 | BULK ''' + @LocationTopLevel + '/*/*.parquet'', 80 | DATA_SOURCE = ''' + @DataSource + ''', 81 | FORMAT = ''Parquet'' 82 | ) AS fct 83 | WHERE fct.filepath(1) IN (SELECT CurrentAggregates FROM CurrentFolder)' 84 | 85 | EXEC sp_executesql @CreateCurrentAggregateView 86 | 87 | END; 88 | -------------------------------------------------------------------------------- /createaggregateswithcetas/triggeraggregatescetassp: -------------------------------------------------------------------------------- 1 | EXEC LDW.GeneratePreComputedDatasets 2 | @SourceView = 'LDW.vwFactWebTelemetryAggregate', 3 | @Location = 'precomputeddatasets/dataminutes2/', 4 | @DataSource = 'ExternalDataSourceDataLakeMI', 5 | @FileFormat = 'SynapseParquetFormat' 6 | -------------------------------------------------------------------------------- /createview/DataLakeMetaData.csv: -------------------------------------------------------------------------------- 1 | FileFormat|HeaderRow|FieldTerminator|ViewName|ExternalDataSourceDataLake|Location|FolderHierarchyDepth|MaxVarcharValue 2 | Parquet|||LDW.vwCustomer|ExternalDataSourceDataLake|webconformed/cleansed/customer/*/*.parquet|1|255 3 | Parquet|||LDW.vwProductCategory|ExternalDataSourceDataLake|webconformed/cleansed/productcategory/*/*.parquet|1|255 4 | Parquet|||LDW.vwProductModel|ExternalDataSourceDataLake|webconformed/cleansed/productmodel/*/*.parquet|1|255 5 | Parquet|||LDW.vwProducts|ExternalDataSourceDataLake|webconformed/cleansed/products/*/*.parquet|1|255 6 | Parquet|||LDW.vwFactTelemetry|ExternalDataSourceDataLake|webvisitmessagesoptimised/EventYear=*/EventMonth=*/EventDateTime=*/*.parquet|3|50 7 | Parquet|||LDW.vwDimDate|ExternalDataSourceDataLake|conformed/dimensions/dimdate/*.parquet|0|50 -------------------------------------------------------------------------------- /createview/README.txt: -------------------------------------------------------------------------------- 1 | This process assumes that an existing Serverless SQL Pools database exists with data sources and file formats created. 2 | 3 | This process currently supports Parquet and Delimited files. 4 | 5 | To run the process: 6 | 7 | 1. Populate the DataLakeMetaData.csv metadata file with the relevant information and upload to a folder in an Azure Data lake Gen2 container 8 | 2. Run the vwViewMetadata SQL script to create the View. Ensure the BULK and DATA_SOURCE reference the relevant location and data source for the metadata file 9 | 3. Run the spGenerateViews SQL script to create the stored procedure 10 | 4. Run the spTriggerGenerateViews SQL script to create the stored procedure 11 | 5. Execute the spTriggerGenerateViews stored procedure to generate the views 12 | -------------------------------------------------------------------------------- /createview/spGenerateViews.sql: -------------------------------------------------------------------------------- 1 | CREATE PROC dbo.spGenerateViews 2 | @FileFormat NVARCHAR(50), 3 | @HeaderRow NVARCHAR(5), 4 | @FieldTerminator NCHAR(1), 5 | @ViewName NCHAR(50), 6 | @ExternalDataSourceDataLake NVARCHAR(255), 7 | @Location NVARCHAR(255), 8 | @FolderHierarchyDepth TINYINT, 9 | @DropView BIT, 10 | @MaxVarcharValue SMALLINT 11 | AS 12 | BEGIN 13 | 14 | --System variables 15 | DECLARE @FileFormatString NVARCHAR(255), 16 | @SchemaSQL NVARCHAR(MAX); 17 | 18 | ------------------------------------ 19 | IF @FileFormat = 'Parquet' 20 | BEGIN 21 | SET @FileFormatString = 'FORMAT = ''Parquet''' 22 | END 23 | ELSE 24 | BEGIN 25 | 26 | SET @FileFormatString = ' 27 | FORMAT = '''+ @FileFormat + ''', 28 | PARSER_VERSION = ''2.0'', 29 | HEADER_ROW = ' + @HeaderRow + ', 30 | FIELDTERMINATOR ='''+ @FieldTerminator + '''' 31 | END 32 | 33 | CREATE TABLE #t ( 34 | is_hidden bit NOT NULL, 35 | column_ordinal int NOT NULL, 36 | name sysname NULL, 37 | is_nullable bit NOT NULL, 38 | system_type_id int NOT NULL, 39 | system_type_name nvarchar(256) NULL, 40 | max_length smallint NOT NULL, 41 | [precision] tinyint NOT NULL, 42 | scale tinyint NOT NULL, 43 | collation_name sysname NULL, 44 | user_type_id int NULL, 45 | user_type_database sysname NULL, 46 | user_type_schema sysname NULL, 47 | user_type_name sysname NULL, 48 | assembly_qualified_type_name nvarchar(4000), 49 | xml_collection_id int NULL, 50 | xml_collection_database sysname NULL, 51 | xml_collection_schema sysname NULL, 52 | xml_collection_name sysname NULL, 53 | is_xml_document bit NOT NULL, 54 | is_case_sensitive bit NOT NULL, 55 | is_fixed_length_clr_type bit NOT NULL, 56 | source_server nvarchar(128), 57 | source_database nvarchar(128), 58 | source_schema nvarchar(128), 59 | source_table nvarchar(128), 60 | source_column nvarchar(128), 61 | is_identity_column bit NULL, 62 | is_part_of_unique_key bit NULL, 63 | is_updateable bit NULL, 64 | is_computed_column bit NULL, 65 | is_sparse_column_set bit NULL, 66 | ordinal_in_order_by_list smallint NULL, 67 | order_by_list_length smallint NULL, 68 | order_by_is_descending smallint NULL, 69 | tds_type_id int NOT NULL, 70 | tds_length int NOT NULL, 71 | tds_collation_id int NULL, 72 | tds_collation_sort_id tinyint NULL 73 | ) 74 | 75 | SET @SchemaSQL = CAST(' 76 | SELECT * FROM 77 | OPENROWSET 78 | ( 79 | BULK ''' + CAST(@Location AS NVARCHAR(MAX)) + ''', 80 | DATA_SOURCE = ''' + CAST(@ExternalDataSourceDataLake AS NVARCHAR(MAX)) + ''',' 81 | + CAST(@FileFormatString AS NVARCHAR(MAX)) + ' 82 | 83 | ) AS fct' AS NVARCHAR(MAX)) 84 | 85 | INSERT INTO #t EXEC sp_describe_first_result_set @tsql = @SchemaSQL 86 | 87 | DECLARE @mincol INT, 88 | @maxcol INT, 89 | @sqltext NVARCHAR(4000) 90 | 91 | SELECT @mincol = MIN(column_ordinal) FROM #t 92 | SELECT @maxcol = MAX(column_ordinal) FROM #t 93 | 94 | set @sqltext = 'CREATE VIEW ' + @ViewName + ' AS SELECT ' 95 | 96 | WHILE @mincol <= @maxcol 97 | BEGIN 98 | SELECT @sqltext = @sqltext + CONCAT('CAST(' 99 | ,[name] 100 | ,' AS ' 101 | ,CASE WHEN system_type_name LIKE 'varchar%' THEN 'varchar(' + CAST(@MaxVarcharValue AS NVARCHAR(5)) + ')' ELSE system_type_name END + ') AS ' 102 | ,[name] 103 | ,CASE WHEN @mincol = @maxcol THEN '' ELSE ',' END) 104 | FROM #t 105 | WHERE column_ordinal = @mincol 106 | 107 | SET @mincol = @mincol +1 108 | END 109 | 110 | DECLARE @Loop INT = 1, 111 | @sqlhierarchytext NVARCHAR(500) = '' 112 | 113 | WHILE @Loop <= @FolderHierarchyDepth 114 | BEGIN 115 | SET @sqlhierarchytext = @sqlhierarchytext + ', fct.filepath('+ CAST(@Loop AS VARCHAR(10)) + ') AS FilePath' + CAST(@Loop AS VARCHAR(10)) 116 | 117 | SET @Loop = @Loop + 1 118 | END 119 | 120 | SET @sqltext = @sqltext + @sqlhierarchytext + ' FROM 121 | OPENROWSET 122 | ( 123 | BULK ''' + @Location + ''', 124 | DATA_SOURCE = ''' + @ExternalDataSourceDataLake + ''',' 125 | + @FileFormatString + ' 126 | 127 | ) AS fct' 128 | 129 | --DROP VIEW 130 | IF @DropView = 1 131 | BEGIN 132 | 133 | IF OBJECT_ID (@ViewName, N'V') IS NOT NULL 134 | BEGIN 135 | DECLARE @DropSQL NVARCHAR(255) 136 | 137 | SET @DropSQL = 'DROP VIEW ' + @ViewName 138 | 139 | EXEC sp_executesql @tsl = @DropSQL 140 | END 141 | END 142 | 143 | --CREATE VIEW 144 | EXEC sp_executesql @tsl = @sqltext 145 | 146 | END 147 | 148 | GO 149 | -------------------------------------------------------------------------------- /createview/spTriggerGenerateViews.sql: -------------------------------------------------------------------------------- 1 | CREATE PROC dbo.spTriggerGenerateViews @DropView BIT 2 | AS 3 | BEGIN 4 | 5 | DECLARE @LoopCountMax INT = (SELECT COUNT(*) FROM LDW.vwViewMetadata), 6 | @LoopCount INT = 1, 7 | @ExternalDataSourceDataLake NVARCHAR(255), 8 | @FileFormat NVARCHAR(50), 9 | @Location NVARCHAR(255), 10 | @HeaderRow NVARCHAR(5), 11 | @FieldTerminator NCHAR(1), 12 | @ViewName NCHAR(50), 13 | @FolderHierarchyDepth TINYINT, 14 | @MaxVarcharValue SMALLINT 15 | 16 | WHILE @LoopCount <= @LoopCountMax 17 | BEGIN 18 | 19 | SELECT @FileFormat = FileFormat, 20 | @HeaderRow = ISNULL(HeaderRow,''), 21 | @FieldTerminator = ISNULL([FieldTerminator],''), 22 | @ViewName = ViewName, 23 | @ExternalDataSourceDataLake = ExternalDataSourceDataLake, 24 | @Location = [Location], 25 | @FolderHierarchyDepth = FolderHierarchyDepth, 26 | @MaxVarcharValue = MaxVarcharValue 27 | FROM LDW.vwViewMetadata 28 | WHERE ViewCreationOrder = @LoopCount 29 | 30 | EXEC LDW.GenerateViews 31 | @FileFormat, 32 | @HeaderRow, 33 | @FieldTerminator, 34 | @ViewName, 35 | @ExternalDataSourceDataLake, 36 | @Location, 37 | @FolderHierarchyDepth, 38 | @DropView, 39 | @MaxVarcharValue 40 | 41 | SET @LoopCount = @LoopCount + 1 42 | 43 | END 44 | 45 | END 46 | -------------------------------------------------------------------------------- /createview/vwViewMetadata.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW dbo.vwViewMetadata 2 | AS 3 | SELECT 4 | ROW_NUMBER() OVER (ORDER BY ViewName) AS ViewCreationOrder, 5 | FileFormat, 6 | HeaderRow, 7 | [FieldTerminator], 8 | ViewName, 9 | ExternalDataSourceDataLake, 10 | [Location], 11 | FolderHierarchyDepth, 12 | MaxVarcharValue 13 | FROM 14 | OPENROWSET 15 | ( 16 | BULK 'metadata/DataLakeMetaData.csv', 17 | DATA_SOURCE = 'ExternalDataSourceDataLake', 18 | FORMAT = 'CSV', 19 | PARSER_VERSION = '2.0', 20 | HEADER_ROW = TRUE, 21 | FIELDTERMINATOR ='|' 22 | ) rwt 23 | -------------------------------------------------------------------------------- /createviewsdynamically: -------------------------------------------------------------------------------- 1 | ----05. 2 | CREATE TABLE #t ( 3 | is_hidden bit NOT NULL, 4 | column_ordinal int NOT NULL, 5 | name sysname NULL, 6 | is_nullable bit NOT NULL, 7 | system_type_id int NOT NULL, 8 | system_type_name nvarchar(256) NULL, 9 | max_length smallint NOT NULL, 10 | precision tinyint NOT NULL, 11 | scale tinyint NOT NULL, 12 | collation_name sysname NULL, 13 | user_type_id int NULL, 14 | user_type_database sysname NULL, 15 | user_type_schema sysname NULL, 16 | user_type_name sysname NULL, 17 | assembly_qualified_type_name nvarchar(4000), 18 | xml_collection_id int NULL, 19 | xml_collection_database sysname NULL, 20 | xml_collection_schema sysname NULL, 21 | xml_collection_name sysname NULL, 22 | is_xml_document bit NOT NULL, 23 | is_case_sensitive bit NOT NULL, 24 | is_fixed_length_clr_type bit NOT NULL, 25 | source_server nvarchar(128), 26 | source_database nvarchar(128), 27 | source_schema nvarchar(128), 28 | source_table nvarchar(128), 29 | source_column nvarchar(128), 30 | is_identity_column bit NULL, 31 | is_part_of_unique_key bit NULL, 32 | is_updateable bit NULL, 33 | is_computed_column bit NULL, 34 | is_sparse_column_set bit NULL, 35 | ordinal_in_order_by_list smallint NULL, 36 | order_by_list_length smallint NULL, 37 | order_by_is_descending smallint NULL, 38 | tds_type_id int NOT NULL, 39 | tds_length int NOT NULL, 40 | tds_collation_id int NULL, 41 | tds_collation_sort_id tinyint NULL 42 | ) 43 | GO 44 | 45 | INSERT INTO #t EXEC sp_describe_first_result_set N' 46 | SELECT * FROM 47 | OPENROWSET 48 | ( 49 | BULK ''sourcedatasystem/Sales_Customers/*.csv'', 50 | DATA_SOURCE = ''ExternalDataSourceDataLake'', 51 | FORMAT = ''CSV'', 52 | PARSER_VERSION = ''2.0'', 53 | HEADER_ROW = TRUE, 54 | FIELDTERMINATOR =''|'' 55 | ) AS fct' 56 | 57 | DECLARE @mincol INT, 58 | @maxcol INT, 59 | @sqltext NVARCHAR(4000) 60 | 61 | SELECT @mincol = MIN(column_ordinal) FROM #t 62 | SELECT @maxcol = MAX(column_ordinal) FROM #t 63 | 64 | set @sqltext = 'CREATE VIEW LDW.vwSalesV2' + CHAR(10) + CHAR(13) + 'AS' + CHAR(10) + CHAR(13) + 'SELECT ' 65 | 66 | WHILE @mincol <= @maxcol 67 | BEGIN 68 | SELECT @sqltext = @sqltext + CONCAT('CAST(',[name],' AS ',system_type_name,') AS ',[name],CASE WHEN @mincol = @maxcol THEN '' ELSE ',' END) 69 | FROM #t 70 | WHERE column_ordinal = @mincol 71 | 72 | SET @mincol = @mincol +1 73 | END 74 | 75 | SET @sqltext = @sqltext + ' FROM 76 | OPENROWSET 77 | ( 78 | BULK ''sourcedatasystem/Sales_Customers/*.csv'', 79 | DATA_SOURCE = ''ExternalDataSourceDataLake'', 80 | FORMAT = ''CSV'', 81 | PARSER_VERSION = ''2.0'', 82 | HEADER_ROW = TRUE, 83 | FIELDTERMINATOR =''|'' 84 | ) AS fct' 85 | 86 | EXEC sp_executesql @tsl = @sqltext 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /dataprocessed.sql: -------------------------------------------------------------------------------- 1 | ;WITH DataUsage 2 | AS 3 | ( 4 | SELECT [type] AS DataUsageWindow, 5 | data_processed_mb AS DataProcessedMB, 6 | CAST(data_processed_mb AS DECIMAL(10,3)) / 1000 AS DataProcessedGB, 7 | (CAST(data_processed_mb AS DECIMAL(10,3)) / 1000) / 1000 AS DataProcessedTB 8 | FROM sys.dm_external_data_processed 9 | ), 10 | DataLimit 11 | AS 12 | ( 13 | SELECT [name] AS DataLimitWindow, 14 | CASE 15 | WHEN [name] LIKE '%daily%' THEN 'daily' 16 | WHEN [name] LIKE '%weekly%' THEN 'weekly' 17 | WHEN [name] LIKE '%monthly%' THEN 'monthly' 18 | END AS DataUsageWindow, 19 | value AS TBValue, 20 | CAST(value_in_use AS INT) AS TBValueInUse 21 | FROM sys.configurations 22 | WHERE [name] LIKE 'Data processed %' 23 | ) 24 | SELECT DL.DataUsageWindow, 25 | DL.TBValueInUse, 26 | DU.DataProcessedMB, 27 | DU.DataProcessedGB, 28 | DU.DataProcessedTB, 29 | (100 / DL.TBValueInUse) * DU.DataProcessedTB AS PercentTBUsed 30 | FROM DataLimit DL 31 | INNER JOIN DataUsage DU ON DL.DataUsageWindow = DU.DataUsageWindow -------------------------------------------------------------------------------- /dependencies/ObjectDependencies.sql: -------------------------------------------------------------------------------- 1 | --https://docs.microsoft.com/en-us/sql/relational-databases/views/get-information-about-a-view?view=sql-server-ver15--from 2 | 3 | SELECT OBJECT_NAME(referencing_id) AS referencing_entity_name, 4 | o.type_desc AS referencing_desciption, 5 | COALESCE(COL_NAME(referencing_id, referencing_minor_id), '(n/a)') AS referencing_minor_id, 6 | referencing_class_desc, referenced_class_desc, 7 | referenced_server_name, referenced_database_name, referenced_schema_name, 8 | referenced_entity_name, 9 | COALESCE(COL_NAME(referenced_id, referenced_minor_id), '(n/a)') AS referenced_column_name, 10 | is_caller_dependent, is_ambiguous 11 | FROM sys.sql_expression_dependencies AS sed 12 | INNER JOIN sys.objects AS o ON sed.referencing_id = o.object_id 13 | --WHERE referencing_id = OBJECT_ID(N'SCHEMA.OBJECTNAME'); 14 | -------------------------------------------------------------------------------- /externaltablemetadata.sql: -------------------------------------------------------------------------------- 1 | USE ; 2 | GO 3 | 4 | SELECT et.[name] AS TableName, 5 | et.[location] AS TableLocation, 6 | ef.[name] AS FileFormatName, 7 | ef.[format_type] AS FileFormatType, 8 | es.[name] AS DataSourceName, 9 | es.[location] AS DataSourceLocation 10 | FROM sys.external_tables et 11 | INNER JOIN sys.external_file_formats ef ON ef.file_format_id = et.file_format_id 12 | INNER JOIN sys.external_data_sources es ON es.data_source_id = et.data_source_id 13 | -------------------------------------------------------------------------------- /querystoragelogs/sqlcreatequeryviewforstoragelogs: -------------------------------------------------------------------------------- 1 | /* 2 | Attributes available for logging are at https://docs.microsoft.com/en-us/azure/storage/blobs/monitor-blob-storage-reference 3 | */ 4 | 5 | --create and configure new Serverless SQL Pools database 6 | CREATE DATABASE LogAnalysis 7 | 8 | USE LogAnalysis 9 | 10 | --encryption to allow authentication 11 | CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'dsfads$%zdsfkjsdhlk456hvwegf'; 12 | 13 | --ensure Synapse workspace has been added as a Storage Blob Data Reader to general purpose storage account 14 | CREATE DATABASE SCOPED CREDENTIAL DataLakeManagedIdentity 15 | WITH IDENTITY='Managed Identity' 16 | 17 | --create data source to general purpose storage account 18 | --replace with relevant value 19 | CREATE EXTERNAL DATA SOURCE ExternalDataSourceDataLakeMI 20 | WITH ( 21 | LOCATION = 'https://.blob.core.windows.net/insights-logs-storageread', 22 | CREDENTIAL = DataLakeManagedIdentity 23 | ); 24 | 25 | --enable support for UTF8 26 | ALTER DATABASE LogAnalysis COLLATE Latin1_General_100_BIN2_UTF8; 27 | 28 | --create View over the storage logs 29 | CREATE OR ALTER VIEW dbo.vwAnalyseLogs 30 | AS 31 | SELECT 32 | time, 33 | resourceId, 34 | category, 35 | operationName, 36 | operationVersion, 37 | schemaVersion, 38 | statusCode, 39 | statusText, 40 | durationMs, 41 | callerIpAddress, 42 | correlationId, 43 | identity_type, 44 | identity_tokenHash, 45 | [location], 46 | identity_delegatedResource_resourceId, 47 | properties_accountName, 48 | properties_serviceType, 49 | properties_objectKey, 50 | properties_metricResponseType, 51 | properties_serverLatencyMs, 52 | properties_requestHeaderSize, 53 | properties_responseHeaderSize, 54 | properties_responseBodySize, 55 | properties_tlsVersion, 56 | uri, 57 | protocol, 58 | resourceType, 59 | jsonrows.filepath(1) as SubscriptionID, 60 | jsonrows.filepath(2) as ResourceGroup, 61 | jsonrows.filepath(3) as StorageAccount, 62 | jsonrows.filepath(4) as LogYear, 63 | jsonrows.filepath(5) as LogMonth, 64 | jsonrows.filepath(6) as LogDay, 65 | jsonrows.filepath(7) as LogHour, 66 | jsonrows.filepath(8) as LogMinute 67 | FROM OPENROWSET 68 | ( 69 | BULK '/resourceId=/subscriptions/*/resourceGroups/*/providers/Microsoft.Storage/storageAccounts/*/blobServices/default/y=*/m=*/d=*/h=*/m=*/*', 70 | DATA_SOURCE = 'ExternalDataSourceDataLakeMI', 71 | FORMAT = 'CSV', 72 |         PARSER_VERSION = '2.0', 73 | FIELDTERMINATOR = '0x09', 74 | FIELDQUOTE = '0x0b', 75 | ROWTERMINATOR = '0x0A' 76 | ) WITH (doc NVARCHAR(4000)) AS jsonrows 77 | CROSS APPLY OPENJSON (doc) 78 | WITH ( time DATETIME2 '$.time', 79 | resourceId VARCHAR(500) '$.resourceId', 80 | category VARCHAR(50) '$.category', 81 | operationName VARCHAR(100) '$.operationName', 82 | operationVersion VARCHAR(10) '$.operationVersion', 83 | schemaVersion VARCHAR(10) '$.schemaVersion', 84 | statusCode SMALLINT '$.statusCode', 85 | statusText VARCHAR(100) '$.statusText', 86 | durationMs INT '$.durationMs', 87 | callerIpAddress VARCHAR(50) '$.callerIpAddress', 88 | correlationId VARCHAR(50) '$.correlationId', 89 | identity_type VARCHAR(100) '$.identity.type', 90 | identity_tokenHash VARCHAR(100) '$.identity.tokenHash', 91 | [location] VARCHAR(50) '$.location', 92 | identity_delegatedResource_resourceId VARCHAR(500) '$.identity.delegatedResource.resourceId', 93 | properties_accountName VARCHAR(50) '$.properties.accountName', 94 | properties_serviceType VARCHAR(30) '$.properties.serviceType', 95 | properties_objectKey VARCHAR(250) '$.properties.objectKey', 96 | properties_metricResponseType VARCHAR(50) '$.properties.metricResponseType', 97 | properties_serverLatencyMs INT '$.properties.serverLatencyMs', 98 | properties_requestHeaderSize INT '$.properties.requestHeaderSize', 99 | properties_responseHeaderSize INT '$.properties.responseHeaderSize', 100 | properties_responseBodySize INT '$.properties.responseBodySize', 101 | properties_tlsVersion VARCHAR(10) '$.properties.tlsVersion', 102 | uri VARCHAR(500) '$.uri', 103 | protocol VARCHAR(50) '$.protocol', 104 | resourceType VARCHAR(250) '$.resourceType' 105 | ) 106 | -------------------------------------------------------------------------------- /querystoragelogs/sqlquerystoragelogs.sql: -------------------------------------------------------------------------------- 1 | /** 2 | this is example usage of the view in sqlcreatequeryviewforstoragelogs 3 | amend the OPENROWSET values accordingly and also the transformation on the column name such as "EventDate" to suit folder partition schema 4 | **/ 5 | 6 | --aggregate by the source EventMonth and show how many unique files were scanned 7 | SELECT 8 | statusText, 9 | CAST(REPLACE(SUBSTRING(uri,PATINDEX('%EventMonth=%',uri)+11,2),'/','') AS TINYINT) AS URIFolderMonth, 10 | COUNT(DISTINCT uri) AS FileScanCount 11 | FROM dbo.vwAnalyseLogs 12 | WHERE LogYear = 2022 13 | AND LogMonth = '07' 14 | AND LogDay = '20' 15 | AND LogHour = '20' 16 | AND operationName = 'ReadFile' 17 | AND identity_delegatedResource_resourceId LIKE '%dhsynapsews%' --synapse workspace 18 | GROUP BY 19 | statusText, 20 | CAST(REPLACE(SUBSTRING(uri,PATINDEX('%EventMonth=%',uri)+11,2),'/','') AS TINYINT) 21 | ORDER BY 2 22 | 23 | --aggregate by the source EventMonth and EventDate folder and show how many unique files were scanned 24 | SELECT 25 | statusText, 26 | CAST(REPLACE(SUBSTRING(uri,PATINDEX('%EventMonth=%',uri)+11,2),'/','') AS TINYINT) AS URIFolderMonth, 27 | SUBSTRING(uri,PATINDEX('%EventDate=%',uri)+10,10) AS URIFolderDate, 28 | COUNT(DISTINCT uri) AS FileScanCount 29 | FROM dbo.vwAnalyseLogs 30 | WHERE LogYear = 2022 31 | AND LogMonth = '07' 32 | AND LogDay = '21' 33 | AND LogHour = '12' 34 | AND operationName = 'ReadFile' 35 | AND identity_delegatedResource_resourceId LIKE '%dhsynapsews%' 36 | GROUP BY 37 | statusText, 38 | CAST(REPLACE(SUBSTRING(uri,PATINDEX('%EventMonth=%',uri)+11,2),'/','') AS TINYINT), 39 | SUBSTRING(uri,PATINDEX('%EventDate=%',uri)+10,10) 40 | ORDER BY 3 41 | -------------------------------------------------------------------------------- /showfileerrors.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW dbo.ShowFileErrors 2 | AS 3 | SELECT 4 | [Error], 5 | [Row], 6 | [Column], 7 | ColumnName, 8 | Value, 9 | [File], 10 | rowdata.filepath(1) AS ErrorFolderName 11 | FROM OPENROWSET( 12 | BULK '/_rejectedrows/*/error.json', 13 | DATA_SOURCE = '', 14 | FORMAT = 'CSV', 15 | FIELDTERMINATOR ='0x0b', 16 | FIELDQUOTE = '0x0b', 17 | ROWTERMINATOR = '0x0b' 18 | ) WITH (doc NVARCHAR(MAX)) AS rowdata 19 | CROSS APPLY openjson (doc) 20 | WITH ( [Error] VARCHAR(1000) '$.Error', 21 | [Row] INT '$.Row', 22 | [Column] INT '$.Column', 23 | ColumnName VARCHAR(1000) '$.ColumnName', 24 | Value VARCHAR(1000) '$.Value', 25 | [File] VARCHAR(1000) '$.File') 26 | --------------------------------------------------------------------------------