├── README.md
├── createaggregateswithcetas
├── createaggregateswithcetas.sql
└── triggeraggregatescetassp
├── createview
├── DataLakeMetaData.csv
├── README.txt
├── spGenerateViews.sql
├── spTriggerGenerateViews.sql
└── vwViewMetadata.sql
├── createviewsdynamically
├── dataprocessed.sql
├── dependencies
└── ObjectDependencies.sql
├── externaltablemetadata.sql
├── querystoragelogs
├── sqlcreatequeryviewforstoragelogs
└── sqlquerystoragelogs.sql
└── showfileerrors.sql
/README.md:
--------------------------------------------------------------------------------
1 | # serverlesssqlpooltools (sspt)
2 | SQL scripts for Azure Synapse Analytics Serverless SQL Pools
3 |
4 | These are utility scripts for various operations in Azure Synapse Analytics Serverless SQL Pools.
5 |
6 | ---
7 |
8 | ## createaggregateswithcetas
9 | This is a dynamic CREATE EXTERNAL TABLE AS SELECT process to write data out to an Azure Data Lake Gen2 account. This is the code that supports blog post https://www.serverlesssql.com/optimisation/using-cetas-to-create-pre-calculated-datasets/
10 |
11 |
12 | ## createview
13 | This is a dynamic CREATE VIEW process which loads an external metadata CSV file then iterates and creates the appropriate views
14 |
15 |
16 | ## querystoragelogs
17 | This is code to create a Serverless SQL Pools LogAnalytics database and then a View over the Azure Storage log format. Supports blog post https://www.serverlesssql.com/optimisation/use-azure-storage-logs-to-analyse-synapse-analytics-serverless-sql-pools-activity/
18 |
19 |
20 | ## createviewsdynamically.sql
21 | Basic SQL statement which can be used to construct a CREATE VIEW statement dynamically using file metadata extraction from sp_describe_first_result_set.
22 |
23 |
24 | ## dataprocessed.sql
25 | Shows the data processed metric vs the daily/weekly/monthly TB (terabytes) limits set.
26 |
27 |
28 | ## externaltablemetadata.sql
29 | Shows the metadata attached to an External Table such as file formats and data source.
30 |
31 |
32 | ## showerrorfiles.sql
33 | Creates a view to show errors logged as part of the OPENROWSET ERRORFILE_LOCATION and ERRORFILE_DATA_SOURCE error handling process.
34 |
35 |
36 | ## Create Aggregate Datasets using CETAS
37 | In the folder **createaggregateswithcetas** is a stored procedure that when passed a View name, will select * from the view and write back to a new folder in the Data Lake. It also creates a new view over this data and only selects the current data.
38 |
--------------------------------------------------------------------------------
/createaggregateswithcetas/createaggregateswithcetas.sql:
--------------------------------------------------------------------------------
1 | CREATE PROCEDURE LDW.GeneratePreComputedDatasets
2 | @SourceView NVARCHAR(200),
3 | @Location NVARCHAR(1000),
4 | @DataSource NVARCHAR(100),
5 | @FileFormat NVARCHAR(100)
6 | AS
7 |
8 | BEGIN
9 |
10 | --declare variables,set the process date for the folder name, and set the locations
11 | DECLARE @LocationFull NVARCHAR(1100),
12 | @LocationTopLevel NVARCHAR(1100),
13 | @ProcessDate NCHAR(16),
14 | @SQLDrop NVARCHAR(2000),
15 | @CreateExternalTableString NVARCHAR(2000),
16 | @CreateCurrentAggregateView NVARCHAR(2000)
17 |
18 | SET @ProcessDate = FORMAT(GETDATE(),'yyyyMMddHHmmss')
19 |
20 | SET @LocationFull = CONCAT(@Location,REPLACE(@SourceView,'.',''),'/',@ProcessDate)
21 | SET @LocationTopLevel = CONCAT(@Location,REPLACE(@SourceView,'.',''))
22 |
23 | --Check for existence of an external table and drop if found
24 | SET @SQLDrop = 'IF OBJECT_ID(''' + REPLACE(@SourceView,'.vw','') + '_PreComputeTable'') IS NOT NULL BEGIN DROP EXTERNAL TABLE ' + REPLACE(@SourceView,'.vw','') + '_PreComputeTable END'
25 |
26 | EXEC sp_executesql @SQLDrop
27 |
28 | --generate the SQL script to create the external table and export the View data
29 | SET @CreateExternalTableString =
30 | 'CREATE EXTERNAL TABLE ' + REPLACE(@SourceView,'.vw','') + '_PreComputeTable
31 | WITH
32 | (
33 | LOCATION = ''' + @LocationFull + ''',
34 | DATA_SOURCE = ' + @DataSource + ',
35 | FILE_FORMAT = ' + @FileFormat + '
36 | )
37 | AS
38 | SELECT
39 | *
40 | FROM ' + @SourceView
41 |
42 | EXEC sp_executesql @CreateExternalTableString
43 |
44 |
45 | --drop the external table as we do not need it, it is only being used to generate the data. We'll use a View to select the data
46 | SET @SQLDrop =
47 | 'IF OBJECT_ID(''' + REPLACE(@SourceView,'.vw','') + '_PreComputeTable'') IS NOT NULL BEGIN DROP EXTERNAL TABLE ' + REPLACE(@SourceView,'.vw','') + '_PreComputeTable END'
48 |
49 | EXEC sp_executesql @SQLDrop
50 |
51 |
52 | --drop the existing precompute View, we will re-create it
53 | SET @SQLDrop =
54 | 'IF OBJECT_ID(''' + @SourceView + '_PreComputeCurrent'') IS NOT NULL BEGIN DROP VIEW ' + @SourceView + '_PreComputeCurrent END'
55 |
56 | EXEC sp_executesql @SQLDrop
57 |
58 | --create a view to show the current data
59 | SET @CreateCurrentAggregateView =
60 | 'CREATE VIEW ' + @SourceView + '_PreComputeCurrent
61 | AS
62 | WITH CurrentFolder
63 | AS
64 | (
65 | SELECT MAX(fct.filepath(1)) AS CurrentAggregates
66 | FROM
67 | OPENROWSET
68 | (
69 | BULK ''' + @LocationTopLevel + '/*/*.parquet'',
70 | DATA_SOURCE = ''' + @DataSource + ''',
71 | FORMAT = ''Parquet''
72 | ) AS fct
73 | )
74 | SELECT fct.filepath(1) AS CurrentAggregateFolder,
75 | *
76 | FROM
77 | OPENROWSET
78 | (
79 | BULK ''' + @LocationTopLevel + '/*/*.parquet'',
80 | DATA_SOURCE = ''' + @DataSource + ''',
81 | FORMAT = ''Parquet''
82 | ) AS fct
83 | WHERE fct.filepath(1) IN (SELECT CurrentAggregates FROM CurrentFolder)'
84 |
85 | EXEC sp_executesql @CreateCurrentAggregateView
86 |
87 | END;
88 |
--------------------------------------------------------------------------------
/createaggregateswithcetas/triggeraggregatescetassp:
--------------------------------------------------------------------------------
1 | EXEC LDW.GeneratePreComputedDatasets
2 | @SourceView = 'LDW.vwFactWebTelemetryAggregate',
3 | @Location = 'precomputeddatasets/dataminutes2/',
4 | @DataSource = 'ExternalDataSourceDataLakeMI',
5 | @FileFormat = 'SynapseParquetFormat'
6 |
--------------------------------------------------------------------------------
/createview/DataLakeMetaData.csv:
--------------------------------------------------------------------------------
1 | FileFormat|HeaderRow|FieldTerminator|ViewName|ExternalDataSourceDataLake|Location|FolderHierarchyDepth|MaxVarcharValue
2 | Parquet|||LDW.vwCustomer|ExternalDataSourceDataLake|webconformed/cleansed/customer/*/*.parquet|1|255
3 | Parquet|||LDW.vwProductCategory|ExternalDataSourceDataLake|webconformed/cleansed/productcategory/*/*.parquet|1|255
4 | Parquet|||LDW.vwProductModel|ExternalDataSourceDataLake|webconformed/cleansed/productmodel/*/*.parquet|1|255
5 | Parquet|||LDW.vwProducts|ExternalDataSourceDataLake|webconformed/cleansed/products/*/*.parquet|1|255
6 | Parquet|||LDW.vwFactTelemetry|ExternalDataSourceDataLake|webvisitmessagesoptimised/EventYear=*/EventMonth=*/EventDateTime=*/*.parquet|3|50
7 | Parquet|||LDW.vwDimDate|ExternalDataSourceDataLake|conformed/dimensions/dimdate/*.parquet|0|50
--------------------------------------------------------------------------------
/createview/README.txt:
--------------------------------------------------------------------------------
1 | This process assumes that an existing Serverless SQL Pools database exists with data sources and file formats created.
2 |
3 | This process currently supports Parquet and Delimited files.
4 |
5 | To run the process:
6 |
7 | 1. Populate the DataLakeMetaData.csv metadata file with the relevant information and upload to a folder in an Azure Data lake Gen2 container
8 | 2. Run the vwViewMetadata SQL script to create the View. Ensure the BULK and DATA_SOURCE reference the relevant location and data source for the metadata file
9 | 3. Run the spGenerateViews SQL script to create the stored procedure
10 | 4. Run the spTriggerGenerateViews SQL script to create the stored procedure
11 | 5. Execute the spTriggerGenerateViews stored procedure to generate the views
12 |
--------------------------------------------------------------------------------
/createview/spGenerateViews.sql:
--------------------------------------------------------------------------------
1 | CREATE PROC dbo.spGenerateViews
2 | @FileFormat NVARCHAR(50),
3 | @HeaderRow NVARCHAR(5),
4 | @FieldTerminator NCHAR(1),
5 | @ViewName NCHAR(50),
6 | @ExternalDataSourceDataLake NVARCHAR(255),
7 | @Location NVARCHAR(255),
8 | @FolderHierarchyDepth TINYINT,
9 | @DropView BIT,
10 | @MaxVarcharValue SMALLINT
11 | AS
12 | BEGIN
13 |
14 | --System variables
15 | DECLARE @FileFormatString NVARCHAR(255),
16 | @SchemaSQL NVARCHAR(MAX);
17 |
18 | ------------------------------------
19 | IF @FileFormat = 'Parquet'
20 | BEGIN
21 | SET @FileFormatString = 'FORMAT = ''Parquet'''
22 | END
23 | ELSE
24 | BEGIN
25 |
26 | SET @FileFormatString = '
27 | FORMAT = '''+ @FileFormat + ''',
28 | PARSER_VERSION = ''2.0'',
29 | HEADER_ROW = ' + @HeaderRow + ',
30 | FIELDTERMINATOR ='''+ @FieldTerminator + ''''
31 | END
32 |
33 | CREATE TABLE #t (
34 | is_hidden bit NOT NULL,
35 | column_ordinal int NOT NULL,
36 | name sysname NULL,
37 | is_nullable bit NOT NULL,
38 | system_type_id int NOT NULL,
39 | system_type_name nvarchar(256) NULL,
40 | max_length smallint NOT NULL,
41 | [precision] tinyint NOT NULL,
42 | scale tinyint NOT NULL,
43 | collation_name sysname NULL,
44 | user_type_id int NULL,
45 | user_type_database sysname NULL,
46 | user_type_schema sysname NULL,
47 | user_type_name sysname NULL,
48 | assembly_qualified_type_name nvarchar(4000),
49 | xml_collection_id int NULL,
50 | xml_collection_database sysname NULL,
51 | xml_collection_schema sysname NULL,
52 | xml_collection_name sysname NULL,
53 | is_xml_document bit NOT NULL,
54 | is_case_sensitive bit NOT NULL,
55 | is_fixed_length_clr_type bit NOT NULL,
56 | source_server nvarchar(128),
57 | source_database nvarchar(128),
58 | source_schema nvarchar(128),
59 | source_table nvarchar(128),
60 | source_column nvarchar(128),
61 | is_identity_column bit NULL,
62 | is_part_of_unique_key bit NULL,
63 | is_updateable bit NULL,
64 | is_computed_column bit NULL,
65 | is_sparse_column_set bit NULL,
66 | ordinal_in_order_by_list smallint NULL,
67 | order_by_list_length smallint NULL,
68 | order_by_is_descending smallint NULL,
69 | tds_type_id int NOT NULL,
70 | tds_length int NOT NULL,
71 | tds_collation_id int NULL,
72 | tds_collation_sort_id tinyint NULL
73 | )
74 |
75 | SET @SchemaSQL = CAST('
76 | SELECT * FROM
77 | OPENROWSET
78 | (
79 | BULK ''' + CAST(@Location AS NVARCHAR(MAX)) + ''',
80 | DATA_SOURCE = ''' + CAST(@ExternalDataSourceDataLake AS NVARCHAR(MAX)) + ''','
81 | + CAST(@FileFormatString AS NVARCHAR(MAX)) + '
82 |
83 | ) AS fct' AS NVARCHAR(MAX))
84 |
85 | INSERT INTO #t EXEC sp_describe_first_result_set @tsql = @SchemaSQL
86 |
87 | DECLARE @mincol INT,
88 | @maxcol INT,
89 | @sqltext NVARCHAR(4000)
90 |
91 | SELECT @mincol = MIN(column_ordinal) FROM #t
92 | SELECT @maxcol = MAX(column_ordinal) FROM #t
93 |
94 | set @sqltext = 'CREATE VIEW ' + @ViewName + ' AS SELECT '
95 |
96 | WHILE @mincol <= @maxcol
97 | BEGIN
98 | SELECT @sqltext = @sqltext + CONCAT('CAST('
99 | ,[name]
100 | ,' AS '
101 | ,CASE WHEN system_type_name LIKE 'varchar%' THEN 'varchar(' + CAST(@MaxVarcharValue AS NVARCHAR(5)) + ')' ELSE system_type_name END + ') AS '
102 | ,[name]
103 | ,CASE WHEN @mincol = @maxcol THEN '' ELSE ',' END)
104 | FROM #t
105 | WHERE column_ordinal = @mincol
106 |
107 | SET @mincol = @mincol +1
108 | END
109 |
110 | DECLARE @Loop INT = 1,
111 | @sqlhierarchytext NVARCHAR(500) = ''
112 |
113 | WHILE @Loop <= @FolderHierarchyDepth
114 | BEGIN
115 | SET @sqlhierarchytext = @sqlhierarchytext + ', fct.filepath('+ CAST(@Loop AS VARCHAR(10)) + ') AS FilePath' + CAST(@Loop AS VARCHAR(10))
116 |
117 | SET @Loop = @Loop + 1
118 | END
119 |
120 | SET @sqltext = @sqltext + @sqlhierarchytext + ' FROM
121 | OPENROWSET
122 | (
123 | BULK ''' + @Location + ''',
124 | DATA_SOURCE = ''' + @ExternalDataSourceDataLake + ''','
125 | + @FileFormatString + '
126 |
127 | ) AS fct'
128 |
129 | --DROP VIEW
130 | IF @DropView = 1
131 | BEGIN
132 |
133 | IF OBJECT_ID (@ViewName, N'V') IS NOT NULL
134 | BEGIN
135 | DECLARE @DropSQL NVARCHAR(255)
136 |
137 | SET @DropSQL = 'DROP VIEW ' + @ViewName
138 |
139 | EXEC sp_executesql @tsl = @DropSQL
140 | END
141 | END
142 |
143 | --CREATE VIEW
144 | EXEC sp_executesql @tsl = @sqltext
145 |
146 | END
147 |
148 | GO
149 |
--------------------------------------------------------------------------------
/createview/spTriggerGenerateViews.sql:
--------------------------------------------------------------------------------
1 | CREATE PROC dbo.spTriggerGenerateViews @DropView BIT
2 | AS
3 | BEGIN
4 |
5 | DECLARE @LoopCountMax INT = (SELECT COUNT(*) FROM LDW.vwViewMetadata),
6 | @LoopCount INT = 1,
7 | @ExternalDataSourceDataLake NVARCHAR(255),
8 | @FileFormat NVARCHAR(50),
9 | @Location NVARCHAR(255),
10 | @HeaderRow NVARCHAR(5),
11 | @FieldTerminator NCHAR(1),
12 | @ViewName NCHAR(50),
13 | @FolderHierarchyDepth TINYINT,
14 | @MaxVarcharValue SMALLINT
15 |
16 | WHILE @LoopCount <= @LoopCountMax
17 | BEGIN
18 |
19 | SELECT @FileFormat = FileFormat,
20 | @HeaderRow = ISNULL(HeaderRow,''),
21 | @FieldTerminator = ISNULL([FieldTerminator],''),
22 | @ViewName = ViewName,
23 | @ExternalDataSourceDataLake = ExternalDataSourceDataLake,
24 | @Location = [Location],
25 | @FolderHierarchyDepth = FolderHierarchyDepth,
26 | @MaxVarcharValue = MaxVarcharValue
27 | FROM LDW.vwViewMetadata
28 | WHERE ViewCreationOrder = @LoopCount
29 |
30 | EXEC LDW.GenerateViews
31 | @FileFormat,
32 | @HeaderRow,
33 | @FieldTerminator,
34 | @ViewName,
35 | @ExternalDataSourceDataLake,
36 | @Location,
37 | @FolderHierarchyDepth,
38 | @DropView,
39 | @MaxVarcharValue
40 |
41 | SET @LoopCount = @LoopCount + 1
42 |
43 | END
44 |
45 | END
46 |
--------------------------------------------------------------------------------
/createview/vwViewMetadata.sql:
--------------------------------------------------------------------------------
1 | CREATE VIEW dbo.vwViewMetadata
2 | AS
3 | SELECT
4 | ROW_NUMBER() OVER (ORDER BY ViewName) AS ViewCreationOrder,
5 | FileFormat,
6 | HeaderRow,
7 | [FieldTerminator],
8 | ViewName,
9 | ExternalDataSourceDataLake,
10 | [Location],
11 | FolderHierarchyDepth,
12 | MaxVarcharValue
13 | FROM
14 | OPENROWSET
15 | (
16 | BULK 'metadata/DataLakeMetaData.csv',
17 | DATA_SOURCE = 'ExternalDataSourceDataLake',
18 | FORMAT = 'CSV',
19 | PARSER_VERSION = '2.0',
20 | HEADER_ROW = TRUE,
21 | FIELDTERMINATOR ='|'
22 | ) rwt
23 |
--------------------------------------------------------------------------------
/createviewsdynamically:
--------------------------------------------------------------------------------
1 | ----05.
2 | CREATE TABLE #t (
3 | is_hidden bit NOT NULL,
4 | column_ordinal int NOT NULL,
5 | name sysname NULL,
6 | is_nullable bit NOT NULL,
7 | system_type_id int NOT NULL,
8 | system_type_name nvarchar(256) NULL,
9 | max_length smallint NOT NULL,
10 | precision tinyint NOT NULL,
11 | scale tinyint NOT NULL,
12 | collation_name sysname NULL,
13 | user_type_id int NULL,
14 | user_type_database sysname NULL,
15 | user_type_schema sysname NULL,
16 | user_type_name sysname NULL,
17 | assembly_qualified_type_name nvarchar(4000),
18 | xml_collection_id int NULL,
19 | xml_collection_database sysname NULL,
20 | xml_collection_schema sysname NULL,
21 | xml_collection_name sysname NULL,
22 | is_xml_document bit NOT NULL,
23 | is_case_sensitive bit NOT NULL,
24 | is_fixed_length_clr_type bit NOT NULL,
25 | source_server nvarchar(128),
26 | source_database nvarchar(128),
27 | source_schema nvarchar(128),
28 | source_table nvarchar(128),
29 | source_column nvarchar(128),
30 | is_identity_column bit NULL,
31 | is_part_of_unique_key bit NULL,
32 | is_updateable bit NULL,
33 | is_computed_column bit NULL,
34 | is_sparse_column_set bit NULL,
35 | ordinal_in_order_by_list smallint NULL,
36 | order_by_list_length smallint NULL,
37 | order_by_is_descending smallint NULL,
38 | tds_type_id int NOT NULL,
39 | tds_length int NOT NULL,
40 | tds_collation_id int NULL,
41 | tds_collation_sort_id tinyint NULL
42 | )
43 | GO
44 |
45 | INSERT INTO #t EXEC sp_describe_first_result_set N'
46 | SELECT * FROM
47 | OPENROWSET
48 | (
49 | BULK ''sourcedatasystem/Sales_Customers/*.csv'',
50 | DATA_SOURCE = ''ExternalDataSourceDataLake'',
51 | FORMAT = ''CSV'',
52 | PARSER_VERSION = ''2.0'',
53 | HEADER_ROW = TRUE,
54 | FIELDTERMINATOR =''|''
55 | ) AS fct'
56 |
57 | DECLARE @mincol INT,
58 | @maxcol INT,
59 | @sqltext NVARCHAR(4000)
60 |
61 | SELECT @mincol = MIN(column_ordinal) FROM #t
62 | SELECT @maxcol = MAX(column_ordinal) FROM #t
63 |
64 | set @sqltext = 'CREATE VIEW LDW.vwSalesV2' + CHAR(10) + CHAR(13) + 'AS' + CHAR(10) + CHAR(13) + 'SELECT '
65 |
66 | WHILE @mincol <= @maxcol
67 | BEGIN
68 | SELECT @sqltext = @sqltext + CONCAT('CAST(',[name],' AS ',system_type_name,') AS ',[name],CASE WHEN @mincol = @maxcol THEN '' ELSE ',' END)
69 | FROM #t
70 | WHERE column_ordinal = @mincol
71 |
72 | SET @mincol = @mincol +1
73 | END
74 |
75 | SET @sqltext = @sqltext + ' FROM
76 | OPENROWSET
77 | (
78 | BULK ''sourcedatasystem/Sales_Customers/*.csv'',
79 | DATA_SOURCE = ''ExternalDataSourceDataLake'',
80 | FORMAT = ''CSV'',
81 | PARSER_VERSION = ''2.0'',
82 | HEADER_ROW = TRUE,
83 | FIELDTERMINATOR =''|''
84 | ) AS fct'
85 |
86 | EXEC sp_executesql @tsl = @sqltext
87 |
88 |
89 |
90 |
--------------------------------------------------------------------------------
/dataprocessed.sql:
--------------------------------------------------------------------------------
1 | ;WITH DataUsage
2 | AS
3 | (
4 | SELECT [type] AS DataUsageWindow,
5 | data_processed_mb AS DataProcessedMB,
6 | CAST(data_processed_mb AS DECIMAL(10,3)) / 1000 AS DataProcessedGB,
7 | (CAST(data_processed_mb AS DECIMAL(10,3)) / 1000) / 1000 AS DataProcessedTB
8 | FROM sys.dm_external_data_processed
9 | ),
10 | DataLimit
11 | AS
12 | (
13 | SELECT [name] AS DataLimitWindow,
14 | CASE
15 | WHEN [name] LIKE '%daily%' THEN 'daily'
16 | WHEN [name] LIKE '%weekly%' THEN 'weekly'
17 | WHEN [name] LIKE '%monthly%' THEN 'monthly'
18 | END AS DataUsageWindow,
19 | value AS TBValue,
20 | CAST(value_in_use AS INT) AS TBValueInUse
21 | FROM sys.configurations
22 | WHERE [name] LIKE 'Data processed %'
23 | )
24 | SELECT DL.DataUsageWindow,
25 | DL.TBValueInUse,
26 | DU.DataProcessedMB,
27 | DU.DataProcessedGB,
28 | DU.DataProcessedTB,
29 | (100 / DL.TBValueInUse) * DU.DataProcessedTB AS PercentTBUsed
30 | FROM DataLimit DL
31 | INNER JOIN DataUsage DU ON DL.DataUsageWindow = DU.DataUsageWindow
--------------------------------------------------------------------------------
/dependencies/ObjectDependencies.sql:
--------------------------------------------------------------------------------
1 | --https://docs.microsoft.com/en-us/sql/relational-databases/views/get-information-about-a-view?view=sql-server-ver15--from
2 |
3 | SELECT OBJECT_NAME(referencing_id) AS referencing_entity_name,
4 | o.type_desc AS referencing_desciption,
5 | COALESCE(COL_NAME(referencing_id, referencing_minor_id), '(n/a)') AS referencing_minor_id,
6 | referencing_class_desc, referenced_class_desc,
7 | referenced_server_name, referenced_database_name, referenced_schema_name,
8 | referenced_entity_name,
9 | COALESCE(COL_NAME(referenced_id, referenced_minor_id), '(n/a)') AS referenced_column_name,
10 | is_caller_dependent, is_ambiguous
11 | FROM sys.sql_expression_dependencies AS sed
12 | INNER JOIN sys.objects AS o ON sed.referencing_id = o.object_id
13 | --WHERE referencing_id = OBJECT_ID(N'SCHEMA.OBJECTNAME');
14 |
--------------------------------------------------------------------------------
/externaltablemetadata.sql:
--------------------------------------------------------------------------------
1 | USE ;
2 | GO
3 |
4 | SELECT et.[name] AS TableName,
5 | et.[location] AS TableLocation,
6 | ef.[name] AS FileFormatName,
7 | ef.[format_type] AS FileFormatType,
8 | es.[name] AS DataSourceName,
9 | es.[location] AS DataSourceLocation
10 | FROM sys.external_tables et
11 | INNER JOIN sys.external_file_formats ef ON ef.file_format_id = et.file_format_id
12 | INNER JOIN sys.external_data_sources es ON es.data_source_id = et.data_source_id
13 |
--------------------------------------------------------------------------------
/querystoragelogs/sqlcreatequeryviewforstoragelogs:
--------------------------------------------------------------------------------
1 | /*
2 | Attributes available for logging are at https://docs.microsoft.com/en-us/azure/storage/blobs/monitor-blob-storage-reference
3 | */
4 |
5 | --create and configure new Serverless SQL Pools database
6 | CREATE DATABASE LogAnalysis
7 |
8 | USE LogAnalysis
9 |
10 | --encryption to allow authentication
11 | CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'dsfads$%zdsfkjsdhlk456hvwegf';
12 |
13 | --ensure Synapse workspace has been added as a Storage Blob Data Reader to general purpose storage account
14 | CREATE DATABASE SCOPED CREDENTIAL DataLakeManagedIdentity
15 | WITH IDENTITY='Managed Identity'
16 |
17 | --create data source to general purpose storage account
18 | --replace with relevant value
19 | CREATE EXTERNAL DATA SOURCE ExternalDataSourceDataLakeMI
20 | WITH (
21 | LOCATION = 'https://.blob.core.windows.net/insights-logs-storageread',
22 | CREDENTIAL = DataLakeManagedIdentity
23 | );
24 |
25 | --enable support for UTF8
26 | ALTER DATABASE LogAnalysis COLLATE Latin1_General_100_BIN2_UTF8;
27 |
28 | --create View over the storage logs
29 | CREATE OR ALTER VIEW dbo.vwAnalyseLogs
30 | AS
31 | SELECT
32 | time,
33 | resourceId,
34 | category,
35 | operationName,
36 | operationVersion,
37 | schemaVersion,
38 | statusCode,
39 | statusText,
40 | durationMs,
41 | callerIpAddress,
42 | correlationId,
43 | identity_type,
44 | identity_tokenHash,
45 | [location],
46 | identity_delegatedResource_resourceId,
47 | properties_accountName,
48 | properties_serviceType,
49 | properties_objectKey,
50 | properties_metricResponseType,
51 | properties_serverLatencyMs,
52 | properties_requestHeaderSize,
53 | properties_responseHeaderSize,
54 | properties_responseBodySize,
55 | properties_tlsVersion,
56 | uri,
57 | protocol,
58 | resourceType,
59 | jsonrows.filepath(1) as SubscriptionID,
60 | jsonrows.filepath(2) as ResourceGroup,
61 | jsonrows.filepath(3) as StorageAccount,
62 | jsonrows.filepath(4) as LogYear,
63 | jsonrows.filepath(5) as LogMonth,
64 | jsonrows.filepath(6) as LogDay,
65 | jsonrows.filepath(7) as LogHour,
66 | jsonrows.filepath(8) as LogMinute
67 | FROM OPENROWSET
68 | (
69 | BULK '/resourceId=/subscriptions/*/resourceGroups/*/providers/Microsoft.Storage/storageAccounts/*/blobServices/default/y=*/m=*/d=*/h=*/m=*/*',
70 | DATA_SOURCE = 'ExternalDataSourceDataLakeMI',
71 | FORMAT = 'CSV',
72 | PARSER_VERSION = '2.0',
73 | FIELDTERMINATOR = '0x09',
74 | FIELDQUOTE = '0x0b',
75 | ROWTERMINATOR = '0x0A'
76 | ) WITH (doc NVARCHAR(4000)) AS jsonrows
77 | CROSS APPLY OPENJSON (doc)
78 | WITH ( time DATETIME2 '$.time',
79 | resourceId VARCHAR(500) '$.resourceId',
80 | category VARCHAR(50) '$.category',
81 | operationName VARCHAR(100) '$.operationName',
82 | operationVersion VARCHAR(10) '$.operationVersion',
83 | schemaVersion VARCHAR(10) '$.schemaVersion',
84 | statusCode SMALLINT '$.statusCode',
85 | statusText VARCHAR(100) '$.statusText',
86 | durationMs INT '$.durationMs',
87 | callerIpAddress VARCHAR(50) '$.callerIpAddress',
88 | correlationId VARCHAR(50) '$.correlationId',
89 | identity_type VARCHAR(100) '$.identity.type',
90 | identity_tokenHash VARCHAR(100) '$.identity.tokenHash',
91 | [location] VARCHAR(50) '$.location',
92 | identity_delegatedResource_resourceId VARCHAR(500) '$.identity.delegatedResource.resourceId',
93 | properties_accountName VARCHAR(50) '$.properties.accountName',
94 | properties_serviceType VARCHAR(30) '$.properties.serviceType',
95 | properties_objectKey VARCHAR(250) '$.properties.objectKey',
96 | properties_metricResponseType VARCHAR(50) '$.properties.metricResponseType',
97 | properties_serverLatencyMs INT '$.properties.serverLatencyMs',
98 | properties_requestHeaderSize INT '$.properties.requestHeaderSize',
99 | properties_responseHeaderSize INT '$.properties.responseHeaderSize',
100 | properties_responseBodySize INT '$.properties.responseBodySize',
101 | properties_tlsVersion VARCHAR(10) '$.properties.tlsVersion',
102 | uri VARCHAR(500) '$.uri',
103 | protocol VARCHAR(50) '$.protocol',
104 | resourceType VARCHAR(250) '$.resourceType'
105 | )
106 |
--------------------------------------------------------------------------------
/querystoragelogs/sqlquerystoragelogs.sql:
--------------------------------------------------------------------------------
1 | /**
2 | this is example usage of the view in sqlcreatequeryviewforstoragelogs
3 | amend the OPENROWSET values accordingly and also the transformation on the column name such as "EventDate" to suit folder partition schema
4 | **/
5 |
6 | --aggregate by the source EventMonth and show how many unique files were scanned
7 | SELECT
8 | statusText,
9 | CAST(REPLACE(SUBSTRING(uri,PATINDEX('%EventMonth=%',uri)+11,2),'/','') AS TINYINT) AS URIFolderMonth,
10 | COUNT(DISTINCT uri) AS FileScanCount
11 | FROM dbo.vwAnalyseLogs
12 | WHERE LogYear = 2022
13 | AND LogMonth = '07'
14 | AND LogDay = '20'
15 | AND LogHour = '20'
16 | AND operationName = 'ReadFile'
17 | AND identity_delegatedResource_resourceId LIKE '%dhsynapsews%' --synapse workspace
18 | GROUP BY
19 | statusText,
20 | CAST(REPLACE(SUBSTRING(uri,PATINDEX('%EventMonth=%',uri)+11,2),'/','') AS TINYINT)
21 | ORDER BY 2
22 |
23 | --aggregate by the source EventMonth and EventDate folder and show how many unique files were scanned
24 | SELECT
25 | statusText,
26 | CAST(REPLACE(SUBSTRING(uri,PATINDEX('%EventMonth=%',uri)+11,2),'/','') AS TINYINT) AS URIFolderMonth,
27 | SUBSTRING(uri,PATINDEX('%EventDate=%',uri)+10,10) AS URIFolderDate,
28 | COUNT(DISTINCT uri) AS FileScanCount
29 | FROM dbo.vwAnalyseLogs
30 | WHERE LogYear = 2022
31 | AND LogMonth = '07'
32 | AND LogDay = '21'
33 | AND LogHour = '12'
34 | AND operationName = 'ReadFile'
35 | AND identity_delegatedResource_resourceId LIKE '%dhsynapsews%'
36 | GROUP BY
37 | statusText,
38 | CAST(REPLACE(SUBSTRING(uri,PATINDEX('%EventMonth=%',uri)+11,2),'/','') AS TINYINT),
39 | SUBSTRING(uri,PATINDEX('%EventDate=%',uri)+10,10)
40 | ORDER BY 3
41 |
--------------------------------------------------------------------------------
/showfileerrors.sql:
--------------------------------------------------------------------------------
1 | CREATE VIEW dbo.ShowFileErrors
2 | AS
3 | SELECT
4 | [Error],
5 | [Row],
6 | [Column],
7 | ColumnName,
8 | Value,
9 | [File],
10 | rowdata.filepath(1) AS ErrorFolderName
11 | FROM OPENROWSET(
12 | BULK '/_rejectedrows/*/error.json',
13 | DATA_SOURCE = '',
14 | FORMAT = 'CSV',
15 | FIELDTERMINATOR ='0x0b',
16 | FIELDQUOTE = '0x0b',
17 | ROWTERMINATOR = '0x0b'
18 | ) WITH (doc NVARCHAR(MAX)) AS rowdata
19 | CROSS APPLY openjson (doc)
20 | WITH ( [Error] VARCHAR(1000) '$.Error',
21 | [Row] INT '$.Row',
22 | [Column] INT '$.Column',
23 | ColumnName VARCHAR(1000) '$.ColumnName',
24 | Value VARCHAR(1000) '$.Value',
25 | [File] VARCHAR(1000) '$.File')
26 |
--------------------------------------------------------------------------------