├── 06_RefreshPowerBIDataset ├── README.md └── pbi_refresh.py ├── 02_PipelineExecute ├── 02_PipelineExecute.csproj ├── 02_PipelineExecute.sln ├── Program.cs └── README.md ├── 03_CopyBlobOneLake ├── 03_CopyBlobOneLake.csproj ├── 03_CopyBlobOneLake.sln ├── Program.cs └── README.md ├── 04_PauseFabricCapacity ├── 04_PauseFabricCapacity.csproj ├── 04_PauseFabricCapacity.sln ├── Program.cs └── README.md ├── 05_ResumeFabricCapacity ├── 05_ResumeFabricCapacity.csproj ├── 05_ResumeFabricCapacity.sln ├── Program.cs └── README.md ├── 01_SqlClientAAD ├── 01_SqlClientAAD │ ├── 01_SqlClientAAD.csproj │ └── Program.cs ├── 01_SqlClientAAD.sln └── README.md ├── 09_sp_WhoIsActive └── README.md ├── README.md ├── 10_SqlEndPoint └── SQL Endpoint.py ├── 07_SharePoint ├── README.md ├── Ingest-SP.ipynb └── builtin │ └── sharepoint.py ├── 11_RestartMirror └── Restart-Meta.ipynb ├── 08_Struct └── TestStructType7GB.ipynb └── .gitignore /06_RefreshPowerBIDataset/README.md: -------------------------------------------------------------------------------- 1 | ## 06_refreshPowerBiDataset 2 | Example of using Python API to refresh PowerBI datasets within workspaces. 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /02_PipelineExecute/02_PipelineExecute.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | _02_PipelineExecute 7 | enable 8 | enable 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /03_CopyBlobOneLake/03_CopyBlobOneLake.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | _03_CopyBlobOneLake 7 | enable 8 | enable 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /04_PauseFabricCapacity/04_PauseFabricCapacity.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | _04_PauseFabricCapacity 7 | enable 8 | enable 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /05_ResumeFabricCapacity/05_ResumeFabricCapacity.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | _05_ResumeFabricCapacity 7 | enable 8 | enable 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /01_SqlClientAAD/01_SqlClientAAD/01_SqlClientAAD.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | _01_SqlClientAAD 7 | enable 8 | enable 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /09_sp_WhoIsActive/README.md: -------------------------------------------------------------------------------- 1 | ## 09 FabricDW sp_WhoIsActive 2 | Monitoring TSQL Script for Fabric DW with similar output to https://whoisactive.com/ which is created and maintained by fellow MCM 3 | Adam Machanic, - probably the most widely used and known 3rd Party scripot for SqlDbEngine 4 | 5 | If you haven't installed sp_WhoIsActive on all your SqlEbEngine, then its a must install. Adam's code is a master class in 6 | design, TSQL coding and extensibility. A lesson in how to go from a basic script to a swiss army knife for instant performance and monitoring ;-) 7 | 8 | This Fabric DW script just shows exec requests, sessions and SQL Statements. When coming from SqlDbEngine this adds some familiarity to monitoring. 9 | 10 | This has moved to a dedicated repos as below 11 | https://github.com/ProdataSQL/FabricWhoIsActive 12 | 13 | -------------------------------------------------------------------------------- /02_PipelineExecute/02_PipelineExecute.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.5.33502.453 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "02_PipelineExecute", "02_PipelineExecute.csproj", "{B0D7A2D3-F86A-489C-BCF5-1F55A61DD881}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {B0D7A2D3-F86A-489C-BCF5-1F55A61DD881}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {B0D7A2D3-F86A-489C-BCF5-1F55A61DD881}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {B0D7A2D3-F86A-489C-BCF5-1F55A61DD881}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {B0D7A2D3-F86A-489C-BCF5-1F55A61DD881}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {462262E5-6AE8-437E-9FA7-AF506F2C2A48} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /03_CopyBlobOneLake/03_CopyBlobOneLake.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.5.33530.505 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "03_CopyBlobOneLake", "03_CopyBlobOneLake.csproj", "{76F21D78-CAF3-4D5C-9DF1-A491F608F0BD}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {76F21D78-CAF3-4D5C-9DF1-A491F608F0BD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {76F21D78-CAF3-4D5C-9DF1-A491F608F0BD}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {76F21D78-CAF3-4D5C-9DF1-A491F608F0BD}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {76F21D78-CAF3-4D5C-9DF1-A491F608F0BD}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {E447DE45-BEDF-4BC9-BD1A-5195647EAFC6} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /01_SqlClientAAD/01_SqlClientAAD.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.2.32616.157 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "01_SqlClientAAD", "01_SqlClientAAD\01_SqlClientAAD.csproj", "{D597459D-D054-4461-BC2B-218D201FEEEF}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {D597459D-D054-4461-BC2B-218D201FEEEF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {D597459D-D054-4461-BC2B-218D201FEEEF}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {D597459D-D054-4461-BC2B-218D201FEEEF}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {D597459D-D054-4461-BC2B-218D201FEEEF}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {6FE212F5-4A84-4F14-9DC1-212B4E0E5A2B} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /04_PauseFabricCapacity/04_PauseFabricCapacity.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.5.33530.505 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "04_PauseFabricCapacity", "04_PauseFabricCapacity.csproj", "{331E394A-B993-4EB6-BCA1-AABFBA4C6346}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {331E394A-B993-4EB6-BCA1-AABFBA4C6346}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {331E394A-B993-4EB6-BCA1-AABFBA4C6346}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {331E394A-B993-4EB6-BCA1-AABFBA4C6346}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {331E394A-B993-4EB6-BCA1-AABFBA4C6346}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {54BBEC92-7911-40EA-9FD4-34FA61B6445F} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /05_ResumeFabricCapacity/05_ResumeFabricCapacity.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.5.33530.505 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "05_ResumeFabricCapacity", "05_ResumeFabricCapacity.csproj", "{AF057D86-8FDA-4992-A4B4-ACA88D826DF6}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {AF057D86-8FDA-4992-A4B4-ACA88D826DF6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {AF057D86-8FDA-4992-A4B4-ACA88D826DF6}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {AF057D86-8FDA-4992-A4B4-ACA88D826DF6}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {AF057D86-8FDA-4992-A4B4-ACA88D826DF6}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {308B5C12-11EE-4592-A42F-AF36BD8F40FB} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /03_CopyBlobOneLake/Program.cs: -------------------------------------------------------------------------------- 1 | using Azure.Identity; 2 | using Azure.Core; 3 | using System.Net.Http.Headers; 4 | 5 | DefaultAzureCredentialOptions DefaultAzureCredentialOptions = new() 6 | { 7 | ExcludeAzureCliCredential = false, 8 | ExcludeVisualStudioCredential = false, 9 | ExcludeAzurePowerShellCredential = false 10 | }; 11 | var defaultAzureCredential= new DefaultAzureCredential(DefaultAzureCredentialOptions); 12 | string bearerToken = defaultAzureCredential.GetToken(new TokenRequestContext(new[] { "https://storage.azure.com/" })).Token; 13 | 14 | HttpClient client = new(); 15 | 16 | string rootUrl = "https://onelake.blob.fabric.microsoft.com/"; 17 | // CHANGE THESE 18 | string sourceUrl = $"{rootUrl}FabricDW [Dev]/FabricLH.Lakehouse/Files/unittest/AdventureWorks/erp/Account.csv"; 19 | string sinkUrl = $"{rootUrl}FabricDW [Dev]/FabricLH.Lakehouse/Files/landing/csv/Account.csv"; 20 | 21 | using (var request = new HttpRequestMessage(HttpMethod.Put, $"{sinkUrl}")) 22 | { 23 | request.Headers.Add("X-Ms-Copy-Source", $"{sourceUrl}"); 24 | request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", bearerToken); 25 | 26 | var response = client.SendAsync(request); 27 | 28 | response.Wait(); 29 | Console.WriteLine(response.Result); 30 | } -------------------------------------------------------------------------------- /04_PauseFabricCapacity/Program.cs: -------------------------------------------------------------------------------- 1 | using Azure.Core; 2 | using Azure.Identity; 3 | using System.Net.Http.Headers; 4 | 5 | 6 | DefaultAzureCredentialOptions DefaultAzureCredentialOptions = new() 7 | { 8 | ExcludeAzureCliCredential = false, 9 | ExcludeVisualStudioCredential = false, 10 | ExcludeAzurePowerShellCredential = false 11 | }; 12 | // Fill in specific information here: 13 | string SubscriptionId = ""; 14 | string ResourceGroupName = ""; 15 | string CapacityName = ""; 16 | 17 | string CapacityUrl = $"https://management.azure.com/subscriptions/{SubscriptionId}/resourceGroups/{ResourceGroupName}/providers/Microsoft.Fabric/capacities/{CapacityName}"; 18 | 19 | var defaultAzureCredential = new DefaultAzureCredential(DefaultAzureCredentialOptions); 20 | string bearerToken = defaultAzureCredential.GetToken(new TokenRequestContext(new[] { "https://management.azure.com" })).Token; 21 | 22 | HttpClient client = new(); 23 | 24 | using (var request = new HttpRequestMessage(HttpMethod.Post, $"{CapacityUrl}/suspend?api-version=2022-07-01-preview")) 25 | { 26 | request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", bearerToken); 27 | 28 | var response = client.SendAsync(request); 29 | 30 | response.Wait(); 31 | Console.WriteLine(response.Result); 32 | } -------------------------------------------------------------------------------- /05_ResumeFabricCapacity/Program.cs: -------------------------------------------------------------------------------- 1 | using Azure.Core; 2 | using Azure.Identity; 3 | using System.Net.Http.Headers; 4 | 5 | 6 | DefaultAzureCredentialOptions DefaultAzureCredentialOptions = new() 7 | { 8 | ExcludeAzureCliCredential = false, 9 | ExcludeVisualStudioCredential = false, 10 | ExcludeAzurePowerShellCredential = false 11 | }; 12 | // Fill in specific information here: 13 | string SubscriptionId = ""; 14 | string ResourceGroupName = ""; 15 | string CapacityName = ""; 16 | 17 | string CapacityUrl = $"https://management.azure.com/subscriptions/{SubscriptionId}/resourceGroups/{ResourceGroupName}/providers/Microsoft.Fabric/capacities/{CapacityName}"; 18 | 19 | var defaultAzureCredential = new DefaultAzureCredential(DefaultAzureCredentialOptions); 20 | string bearerToken = defaultAzureCredential.GetToken(new TokenRequestContext(new[] { "https://management.azure.com" })).Token; 21 | 22 | HttpClient client = new(); 23 | 24 | using (var request = new HttpRequestMessage(HttpMethod.Post, $"{CapacityUrl}/resume?api-version=2022-07-01-preview")) 25 | { 26 | request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", bearerToken); 27 | 28 | var response = client.SendAsync(request); 29 | 30 | response.Wait(); 31 | Console.WriteLine(response.Result); 32 | } -------------------------------------------------------------------------------- /02_PipelineExecute/Program.cs: -------------------------------------------------------------------------------- 1 | using Azure.Identity; 2 | using Azure.Core; 3 | using System.Net.Http.Headers; 4 | 5 | var DefaultAzureCredentialOptions = new DefaultAzureCredentialOptions 6 | { 7 | ExcludeAzureCliCredential = false, 8 | ExcludeManagedIdentityCredential = true, 9 | ExcludeSharedTokenCacheCredential = true, 10 | ExcludeVisualStudioCredential = false, 11 | ExcludeAzurePowerShellCredential = false, 12 | ExcludeEnvironmentCredential = true, 13 | ExcludeVisualStudioCodeCredential = true, 14 | ExcludeInteractiveBrowserCredential = true 15 | }; 16 | 17 | var accessToken = new DefaultAzureCredential(DefaultAzureCredentialOptions).GetToken(new TokenRequestContext(new[] { "https://analysis.windows.net/powerbi/api/.default" })); 18 | string Token = accessToken.Token.ToString(); 19 | 20 | // constructs pipeline url 21 | string pipelineId = "0987f3e1-4f93-46f9-b43b-c53dbbc13c33"; 22 | string pipelineUrl = $"https://wabi-north-europe-redirect.analysis.windows.net/metadata/artifacts/{pipelineId}/jobs/Pipeline"; 23 | 24 | HttpClient client = new(); 25 | using (var request = new HttpRequestMessage(HttpMethod.Post, pipelineUrl)) 26 | { 27 | // attaches headers 28 | request.Headers.Add("Accept", "application/json, text/plain, */*"); 29 | request.Headers.Add("Accept-Encoding", "gzip, deflate, br"); 30 | request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", Token); 31 | 32 | var response = client.SendAsync(request); 33 | 34 | response.Wait(); 35 | Console.WriteLine(response.Result.ToString()); 36 | } -------------------------------------------------------------------------------- /01_SqlClientAAD/01_SqlClientAAD/Program.cs: -------------------------------------------------------------------------------- 1 | 2 | using System.Data.SqlClient; 3 | using Azure.Identity; 4 | using Azure.Core; 5 | using System.Data; 6 | 7 | var DefaultAzureCredentialOptions = new DefaultAzureCredentialOptions 8 | { 9 | ExcludeAzureCliCredential = true, 10 | ExcludeManagedIdentityCredential = true, 11 | ExcludeSharedTokenCacheCredential = true, 12 | ExcludeVisualStudioCredential = false, 13 | ExcludeAzurePowerShellCredential = true, 14 | ExcludeEnvironmentCredential = true, 15 | ExcludeVisualStudioCodeCredential = true, 16 | ExcludeInteractiveBrowserCredential = true 17 | }; 18 | 19 | var accessToken = new DefaultAzureCredential(DefaultAzureCredentialOptions).GetToken(new TokenRequestContext(new string[] { "https://database.windows.net//.default" })); 20 | var sqlServer = "fkm4vwf6l6zebg4lqrhbtdcmsq-absyvg6llsuutcc3wwyid37nou.datawarehouse.pbidedicated.windows.net"; 21 | var sqlDatabase = ""; 22 | var connectionString = $"Server={sqlServer};Database={sqlDatabase}"; 23 | 24 | //Set AAD Access Token, Open Conneciton, Run Queries and Disconnect 25 | using var con = new SqlConnection(connectionString); 26 | con.AccessToken = accessToken.Token; 27 | con.Open(); 28 | using var cmd = new SqlCommand(); 29 | cmd.Connection = con; 30 | cmd.CommandType = CommandType.Text; 31 | cmd.CommandText = "SELECT @@Version"; 32 | var res =cmd.ExecuteScalar(); 33 | 34 | con.Close(); 35 | 36 | 37 | Console.WriteLine(res); 38 | Console.ReadLine(); 39 | 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fabric 2 | 3 | This contains random examples of code snippets for Microsoft Fabric. Particulary stuff we couldn't find documented. 4 | All of these examples can use various auth methods such as Managed Identity, Visual Studio and VS Code. 5 | ## [01_SqlClientAAD](01_SqlClientAAD/) 6 | Example of using Azure.Identity with SqlClient to authenticate to a Fabric SQL EndPoint. 7 | 8 | 9 | ## [02_PipelineExecute](02_PipelineExecute/) 10 | Example of using Azure.Identity with HttpClient to authenticate to a Fabric Pipeline and execute it. 11 | This interacts with the api the same way the web client would when requesting a pipeline execution. 12 | 13 | ## [03_CopyBlobOneLake](03_CopyBlobOneLake/) 14 | Example of using Azure.Identity with HttpClient to copy a Blob across the OneLake. 15 | This is a basic example of a rest call to copy a file on a onelake. 16 | 17 | ## [04_PauseFabricCapacity](04_PauseFabricCapacity/) 18 | Example of using Azure.Identity with HttpClient to authenticate to a Fabric Capcity and Suspend it. 19 | This is the same management api as used in a web browser. Will fail if capacity is already paused 20 | 21 | ## [05_ResumeFabricCapacity](05_ResumeFabricCapicty/) 22 | Example of using Azure.Identity with HttpClient to authenticate to a Fabric Capcity and Resume it. 23 | This is the same management api as used in a web browser. Will fail if capacity is already running. 24 | 25 | ## [06_RefreshPowerBIDataset](06_RefreshPowerBIDataset/) 26 | Example of using Python API to refresh PowerBI datasets within workspaces. 27 | 28 | ## [07_SharePoint](07_SharePoint/) 29 | Sample Fabric Notebooks and builtin python class to Integrate Sharepoint and Fabric using Graph API. 30 | 31 | ## [09_sp_WhoIsActive](09_sp_WhoIsActive/) 32 | Monitoring script for Fabric DW to show requests, sessions and blocking. This has moved to https://github.com/ProdataSQL/FabricWhoIsActive 33 | 34 | ## [10_SqlEndPoint](10_SqlEndPoint/SQL%20Endpoint.py) 35 | Sample Notebook to connect to a Fabric SqlEndPoint using AAD Token from a Notebook with Python 36 | 37 | ## [11_RestartMirror](11_RestartMirror/) 38 | Sample Fabric Notebooks to Restart a SQL Database Mirror 39 | 40 | -------------------------------------------------------------------------------- /10_SqlEndPoint/SQL Endpoint.py: -------------------------------------------------------------------------------- 1 | dw_name="FabricDW" # Change this to your DW Name 2 | 3 | import sempy.fabric as fabric 4 | import struct 5 | import sqlalchemy 6 | import pyodbc 7 | import pandas as pd 8 | from notebookutils import mssparkutils 9 | 10 | #Function to Return sqlalchemt ODBC Engine, given a connection string and using Integrated AAD Auth to Fabric 11 | def create_engine(connection_string : str): 12 | token = mssparkutils.credentials.getToken('https://analysis.windows.net/powerbi/api').encode("UTF-16-LE") 13 | token_struct = struct.pack(f' 14 | using System.Data.SqlClient; 15 | using Azure.Identity; 16 | using Azure.Core; 17 | using System.Data; 18 | 19 | var DefaultAzureCredentialOptions = new DefaultAzureCredentialOptions 20 | { 21 | ExcludeAzureCliCredential = true, 22 | ExcludeManagedIdentityCredential = true, 23 | ExcludeSharedTokenCacheCredential = true, 24 | ExcludeVisualStudioCredential = false, 25 | ExcludeAzurePowerShellCredential = true, 26 | ExcludeEnvironmentCredential = true, 27 | ExcludeVisualStudioCodeCredential = true, 28 | ExcludeInteractiveBrowserCredential = true 29 | }; 30 | 31 | var accessToken = new DefaultAzureCredential(DefaultAzureCredentialOptions).GetToken(new TokenRequestContext(new string[] { "https://database.windows.net//.default" })); 32 | var sqlServer = "fkm4vwf6l6zebg4lqrhbtdcmsq-absyvg6llsuutcc3wwyid37nou.datawarehouse.pbidedicated.windows.net"; 33 | var sqlDatabase = ""; 34 | var connectionString = $"Server={sqlServer};Database={sqlDatabase}"; 35 | 36 | //Set AAD Access Token, Open Conneciton, Run Queries and Disconnect 37 | using var con = new SqlConnection(connectionString); 38 | con.AccessToken = accessToken.Token; 39 | con.Open(); 40 | using var cmd = new SqlCommand(); 41 | cmd.Connection = con; 42 | cmd.CommandType = CommandType.Text; 43 | cmd.CommandText = "SELECT @@Version"; 44 | var res =cmd.ExecuteScalar(); 45 | con.Close(); 46 | 47 | Console.WriteLine(res); 48 | 49 | 50 | -------------------------------------------------------------------------------- /07_SharePoint/README.md: -------------------------------------------------------------------------------- 1 | ## 07_SharePoint 2 | Example Fabric Notebook with SharePoint Integration an AAD Service Principle and Graph API. 3 | This allows Notebooks to seamlessly download file and folders from SharePoint. 4 | 5 | What you will need 6 | - ClientID and Secret for Service Principle (details below). Our sample assumes that the Secret is in Keyvault. 7 | - TenantID for AAD where the app regsistration has been added. 8 | - A Sharepoint Site, Library, optional Folder and some sample files 9 | - A Fabric Workspace with Notebooks 10 | 11 | **Pre-Requisite 12 | You will need to create the service principle and assign Sharepoint permissions to the service principle for the target site. 13 | This process is very well documented in the blog here 14 | https://sposcripts.com/download-files-from-sharepoint-using-graph/ 15 | 16 | Sample Code below to download files, including wildard support 17 |

18 | ConnectionSettings = '{"library": "Unittest", "tenant_id":"xxxxxxxx-xxxx--xxxx-xxxxxxxxxxxx","app_client_id":"app-fabricdw-dev-clientid","app_client_secret":"app-fabricdw-dev-clientsecret","keyvault":"kv-fabric-dev","sharepoint_url":"prodata365.sharepoint.com","site":"Fabric"}'
19 | SourceSettings = '{}'
20 | SourceDirectory = 'tst'
21 | TargetDirectory = 'unittest/AW/tst'
22 | SourceObject = '*.xlsx'
23 | TargetFileName = ''
24 | TargetSettings = ''   
25 | 
26 | from builtin.sharepoint import Sharepoint,AuthToken
27 | import pandas
28 | import json
29 | from os.path import join
30 | from pathlib import Path
31 | 
32 | SourceSettings = SourceSettings or '{}'
33 | ConnectionSettings = ConnectionSettings or '{}'
34 | source_connection_options = json.loads(ConnectionSettings)
35 | source_options = json.loads(SourceSettings)
36 | 
37 | auth_token = AuthToken(**source_connection_options)
38 | sharepoint = Sharepoint(auth_token, folder=SourceDirectory, file=SourceObject, **source_options, **source_connection_options)
39 | 
40 | files = sharepoint.get_file_bytes()
41 | 
42 | for file_name, file_bytes in files.items():
43 |     Path(join("/lakehouse/default/Files/",TargetDirectory)).mkdir(parents=True, exist_ok=True)
44 | 
45 |     with open(join("/lakehouse/default/Files/",TargetDirectory,file_name), "wb") as f:
46 |         f.write(file_bytes.getbuffer())
47 | 
48 | 49 | -------------------------------------------------------------------------------- /03_CopyBlobOneLake/README.md: -------------------------------------------------------------------------------- 1 | ## 03_CopyBlobOneLake 2 | Example of using Azure.Identity with HttpClient to copy a blob on OneLake. 3 | Currently tested mechanisms of authentication are AzurePowerShellCredential, VisualStudioCredential only works so far on older versions of Visual Studio components in VS 20022. 4 | 5 | 6 | Some Warnings: 7 | 1. You need to ensure that ExcludeManagedIdentityCredential is set to True if you are not using Managed Identity. 8 | Ths avoids timeouts as Azure.Identity alwasy tries Managed Identity First. 9 | 10 | 2. If doing frequent connections you need to consider caching the AccessToken. By default is valid for an hour, but re-caling the 11 | TokenRequest on each connection request can be a second or so of wasted time. 12 | 13 | 3. Make sure to change the default details in sourceUrl and sinkUrl as these are Workspace specific. 14 | 15 | 4. We are tracking a bug in the Visual Studio DLLs whereby VisualStudioCredential does not work for generating tokens on the latest version, but you can switch to AzureCliCredential and this does work. 16 | We are triaging this and will work with Microsoft Support to confirm if this is a bug, or if VisualStudioCredenital has lost some support. 17 | 18 | 19 |

20 | using Azure.Identity;
21 | using Azure.Core;
22 | using System.Net.Http.Headers;
23 | 
24 | DefaultAzureCredentialOptions DefaultAzureCredentialOptions = new()
25 | {
26 |     ExcludeAzureCliCredential = false,
27 |     ExcludeVisualStudioCredential = false,
28 |     ExcludeAzurePowerShellCredential = false
29 | };
30 | var defaultAzureCredential= new DefaultAzureCredential(DefaultAzureCredentialOptions);
31 | string bearerToken = defaultAzureCredential.GetToken(new TokenRequestContext(new[] { "https://storage.azure.com/" })).Token;
32 | 
33 | HttpClient client = new();
34 | 
35 | string rootUrl = "https://onelake.blob.fabric.microsoft.com/";
36 | // CHANGE THESE
37 | string sourceUrl = $"{rootUrl}FabricDW [Dev]/FabricLH.Lakehouse/Files/unittest/AdventureWorks/erp/Account.csv";
38 | string sinkUrl = $"{rootUrl}FabricDW [Dev]/FabricLH.Lakehouse/Files/landing/csv/Account.csv";
39 | 
40 | using (var request = new HttpRequestMessage(HttpMethod.Put, $"{sinkUrl}"))
41 | {
42 |     request.Headers.Add("X-Ms-Copy-Source", $"{sourceUrl}");
43 |     request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", bearerToken);
44 | 
45 |     var response = client.SendAsync(request);
46 | 
47 |     response.Wait();
48 |     Console.WriteLine(response.Result);
49 | }
50 | 
51 | -------------------------------------------------------------------------------- /04_PauseFabricCapacity/README.md: -------------------------------------------------------------------------------- 1 | ## 04_Pause 2 | Example of using Azure.Identity with HttpClient to copy a blob on OneLake. 3 | Currently tested mechanisms of authentication are AzurePowerShellCredential, VisualStudioCredential only works so far on older versions of Visual Studio components in VS 20022. 4 | 5 | 6 | Some Warnings: 7 | 1. You need to ensure that ExcludeManagedIdentityCredential is set to True if you are not using Managed Identity. 8 | Ths avoids timeouts as Azure.Identity alwasy tries Managed Identity First. 9 | 10 | 2. If doing frequent connections you need to consider caching the AccessToken. By default is valid for an hour, but re-caling the 11 | TokenRequest on each connection request can be a second or so of wasted time. 12 | 13 | 3. Make sure to change the SubscriptionId, ResourceGroupName and CapacityName to relevant values. 14 | 15 | 4. We are tracking a bug in the Visual Studio DLLs whereby VisualStudioCredential does not work for generating tokens on the latest version, but you can switch to AzureCliCredential and this does work. 16 | We are triaging this and will work with Microsoft Support to confirm if this is a bug, or if VisualStudioCredenital has lost some support. 17 | 18 | 19 |

20 | using Azure.Core;
21 | using Azure.Identity;
22 | using System.Net.Http.Headers;
23 | 
24 | 
25 | DefaultAzureCredentialOptions DefaultAzureCredentialOptions = new()
26 | {
27 |     ExcludeAzureCliCredential = false,
28 |     ExcludeVisualStudioCredential = false,
29 |     ExcludeAzurePowerShellCredential = false
30 | };
31 | // Fill in specific information here:
32 | string SubscriptionId = "";
33 | string ResourceGroupName = "";
34 | string CapacityName = "";
35 | 
36 | string CapacityUrl = $"https://management.azure.com/subscriptions/{SubscriptionId}/resourceGroups/{ResourceGroupName}/providers/Microsoft.Fabric/capacities/{CapacityName}";
37 | 
38 | var defaultAzureCredential = new DefaultAzureCredential(DefaultAzureCredentialOptions);
39 | string bearerToken = defaultAzureCredential.GetToken(new TokenRequestContext(new[] { "https://management.azure.com" })).Token;
40 | 
41 | HttpClient client = new();
42 | 
43 | using (var request = new HttpRequestMessage(HttpMethod.Post, $"{CapacityUrl}/suspend?api-version=2022-07-01-preview"))
44 | {
45 |     request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", bearerToken);
46 | 
47 |     var response = client.SendAsync(request);
48 | 
49 |     response.Wait();
50 |     Console.WriteLine(response.Result);
51 | }
52 | 
53 | -------------------------------------------------------------------------------- /05_ResumeFabricCapacity/README.md: -------------------------------------------------------------------------------- 1 | ## 05_ResumeFabricCapacity 2 | Example of using Azure.Identity with HttpClient to copy a blob on OneLake. 3 | Currently tested mechanisms of authentication are AzurePowerShellCredential, VisualStudioCredential only works so far on older versions of Visual Studio components in VS 20022. 4 | 5 | 6 | Some Warnings: 7 | 1. You need to ensure that ExcludeManagedIdentityCredential is set to True if you are not using Managed Identity. 8 | Ths avoids timeouts as Azure.Identity alwasy tries Managed Identity First. 9 | 10 | 2. If doing frequent connections you need to consider caching the AccessToken. By default is valid for an hour, but re-caling the 11 | TokenRequest on each connection request can be a second or so of wasted time. 12 | 13 | 3. Make sure to change the SubscriptionId, ResourceGroupName and CapacityName to relevant values. 14 | 15 | 4. We are tracking a bug in the Visual Studio DLLs whereby VisualStudioCredential does not work for generating tokens on the latest version, but you can switch to AzureCliCredential and this does work. 16 | We are triaging this and will work with Microsoft Support to confirm if this is a bug, or if VisualStudioCredenital has lost some support. 17 | 18 | 19 |

20 | using Azure.Core;
21 | using Azure.Identity;
22 | using System.Net.Http.Headers;
23 | 
24 | 
25 | DefaultAzureCredentialOptions DefaultAzureCredentialOptions = new()
26 | {
27 |     ExcludeAzureCliCredential = false,
28 |     ExcludeVisualStudioCredential = false,
29 |     ExcludeAzurePowerShellCredential = false
30 | };
31 | // Fill in specific information here:
32 | string SubscriptionId = "";
33 | string ResourceGroupName = "";
34 | string CapacityName = "";
35 | 
36 | string CapacityUrl = $"https://management.azure.com/subscriptions/{SubscriptionId}/resourceGroups/{ResourceGroupName}/providers/Microsoft.Fabric/capacities/{CapacityName}";
37 | 
38 | var defaultAzureCredential = new DefaultAzureCredential(DefaultAzureCredentialOptions);
39 | string bearerToken = defaultAzureCredential.GetToken(new TokenRequestContext(new[] { "https://management.azure.com" })).Token;
40 | 
41 | HttpClient client = new();
42 | 
43 | using (var request = new HttpRequestMessage(HttpMethod.Post, $"{CapacityUrl}/resume?api-version=2022-07-01-preview"))
44 | {
45 |     request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", bearerToken);
46 | 
47 |     var response = client.SendAsync(request);
48 | 
49 |     response.Wait();
50 |     Console.WriteLine(response.Result);
51 | }
52 | 
53 | -------------------------------------------------------------------------------- /02_PipelineExecute/README.md: -------------------------------------------------------------------------------- 1 | ## 02_PipelineExecute 2 | Example of using Azure.Identity with HttpClient to execute a Fabric Pipeline. 3 | Currently tested mechanisms of authentication are AzurePowerShellCredential, VisualStudioCredential only works so far on older versions of Visual Studio components in VS 20022. 4 | 5 | The root of the url (wabi-north-europe-redirect) may need to be changed depending on your region 6 | 7 | Some Warnings: 8 | 1. You need to ensure that ExcludeManagedIdentityCredential is set to True if you are not using Managed Identity. 9 | Ths avoids timeouts as Azure.Identity alwasy tries Managed Identity First. 10 | 11 | 2. If doing frequent connections you need to consider caching the AccessToken. By default is valid for an hour, but re-caling the 12 | TokenRequest on each connection request can be a second or so of wasted time. 13 | 14 | 3. We are tracking a bug in the Visual Studio DLLs whereby VisualStudioCredential does not work for generating tokens on the latest version, but you can switch to AzureCliCredential and this does work. 15 | We are triaging this and will work with Microsoft Support to confirm if this is a bug, or if VisualStudioCredenital has lost some support. 16 | 17 |

18 | using Azure.Identity;
19 | using Azure.Core;
20 | using System.Net.Http.Headers;
21 | 
22 | var DefaultAzureCredentialOptions = new DefaultAzureCredentialOptions
23 | {
24 |     ExcludeAzureCliCredential = true,
25 |     ExcludeManagedIdentityCredential = true,
26 |     ExcludeSharedTokenCacheCredential = true,
27 |     ExcludeVisualStudioCredential = false,
28 |     ExcludeAzurePowerShellCredential = false,
29 |     ExcludeEnvironmentCredential = true,
30 |     ExcludeVisualStudioCodeCredential = true,
31 |     ExcludeInteractiveBrowserCredential = true
32 | };
33 | 
34 | var accessToken = new DefaultAzureCredential(DefaultAzureCredentialOptions).GetToken(new TokenRequestContext(new[] { "https://analysis.windows.net/powerbi/api/.default" }));
35 | string Token = accessToken.Token.ToString();
36 | 
37 | // constructs pipeline url
38 | string pipelineId = "0987f3e1-4f93-46f9-b43b-c53dbbc13c33";
39 | string pipelineUrl = $"https://wabi-north-europe-redirect.analysis.windows.net/metadata/artifacts/{pipelineId}/jobs/Pipeline";
40 | 
41 | HttpClient client = new();
42 | using (var request = new HttpRequestMessage(HttpMethod.Post, pipelineUrl))
43 | {
44 |     // attaches headers
45 |     request.Headers.Add("Accept", "application/json, text/plain, */*");
46 |     request.Headers.Add("Accept-Encoding", "gzip, deflate, br");
47 |     request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", Token);
48 | 
49 |     var response = client.SendAsync(request);
50 | 
51 |     response.Wait();
52 |     Console.WriteLine(response.Result.ToString());
53 | }
54 | 
55 | 56 | -------------------------------------------------------------------------------- /11_RestartMirror/Restart-Meta.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"markdown","source":["### Stop and Start Fabric Mirror\n","use this to fix and replication/mirror issues on Meta SQLDB
\n","Documentation:
\n","https://learn.microsoft.com/en-us/fabric/database/sql/start-stop-mirroring-api?tabs=5dot1"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"jupyter_python"}},"id":"3887a878-1bc3-4f99-8038-2edf7ea0d89f"},{"cell_type":"code","source":["### STOP MIRROR ###\n","import sempy.fabric as fabric\n","import requests\n","\n","database_name =\"Meta\"\n","workspace_id= fabric.get_workspace_id()\n","items = fabric.FabricRestClient().get(f\"/v1/workspaces/{workspace_id}/SQLDatabases\").json()[\"value\"]\n","database=next((endpoint for endpoint in items if endpoint[\"displayName\"] == database_name))\n","database_id=database[\"id\"]\n","\n","url = f\"v1/workspaces/{workspace_id}/sqlDatabases/{database_id}/stopMirroring\"\n","r = fabric.FabricRestClient().post(url)\n","r.raise_for_status\n","\n","print (f\"Stop Command Sent to {database_name}\")\n","\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.jupyter.statement-meta+json":{"session_id":"ee2bf30e-3b6f-4be4-b4de-d68ce5294ae9","normalized_state":"finished","queued_time":"2025-12-11T18:55:08.1572122Z","session_start_time":null,"execution_start_time":"2025-12-11T18:55:08.1582251Z","execution_finish_time":"2025-12-11T18:55:09.5744834Z","parent_msg_id":"7d18495e-00b1-4843-8f50-190956bb78bf"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Stop Command Sent to Meta\n"]}],"execution_count":17,"metadata":{"microsoft":{"language":"python","language_group":"jupyter_python"}},"id":"67210334-1b8e-43aa-baac-c72fdacf5b0b"},{"cell_type":"code","source":["### START MIRROR ###\n","import sempy.fabric as fabric\n","import requests\n","\n","database_name =\"Meta\"\n","workspace_id= fabric.get_workspace_id()\n","items = fabric.FabricRestClient().get(f\"/v1/workspaces/{workspace_id}/SQLDatabases\").json()[\"value\"]\n","database=next((endpoint for endpoint in items if endpoint[\"displayName\"] == database_name))\n","database_id=database[\"id\"]\n","\n","url = f\"v1/workspaces/{workspace_id}/sqlDatabases/{database_id}/startMirroring\"\n","r = fabric.FabricRestClient().post(url)\n","r.raise_for_status\n","\n","print (f\"Start Command Sent to {database_name}\")\n","\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.jupyter.statement-meta+json":{"session_id":"ee2bf30e-3b6f-4be4-b4de-d68ce5294ae9","normalized_state":"finished","queued_time":"2025-12-11T18:56:34.7472151Z","session_start_time":null,"execution_start_time":"2025-12-11T18:56:34.7482046Z","execution_finish_time":"2025-12-11T18:56:36.1537854Z","parent_msg_id":"004bfc60-9b04-4308-952b-7dad4ce5f80d"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Start Command Sent to Meta\n"]}],"execution_count":19,"metadata":{"microsoft":{"language":"python","language_group":"jupyter_python"}},"id":"07a8724f-b7f5-48db-b7df-2b7d54e95552"}],"metadata":{"kernel_info":{"name":"jupyter","jupyter_kernel_name":"python3.11"},"kernelspec":{"name":"jupyter","display_name":"Jupyter"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"jupyter_python","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"1200000"}}},"dependencies":{}},"nbformat":4,"nbformat_minor":5} -------------------------------------------------------------------------------- /08_Struct/TestStructType7GB.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","source":["\n","import os\n","from pyspark.sql.functions import lit, input_file_name, expr\n","from pyspark.sql.types import StructType, StructField, LongType, IntegerType, ByteType, DateType, TimestampType, BooleanType, DecimalType, StringType, ShortType\n","\n","\n","\n","file_path = \"Files/Test/TestCSV4.csv\"\n","\n","table_schema = StructType([\\\n"," StructField(\"GUESTCHECKLINEITEMID\", LongType(), True),\\\n"," StructField(\"ORGANIZATIONID\", ByteType(), True),\\\n"," StructField(\"LOCATIONID\", IntegerType(), True),\\\n"," StructField(\"REVENUECENTERID\", LongType(), True),\\\n"," StructField(\"ORDERTYPEID\", LongType(), True),\\\n"," StructField(\"BUSINESSDATE\", DateType(), True),\\\n"," StructField(\"FIXEDPERIOD\", ByteType(), True),\\\n"," StructField(\"TRANSDATETIME\", TimestampType(), True),\\\n"," StructField(\"POSTRANSREF\", LongType(), True),\\\n"," StructField(\"SERVICEROUNDNUM\", ByteType(), True),\\\n"," StructField(\"LINENUM\", ByteType(), True),\\\n"," StructField(\"SEATNUM\", ByteType(), True),\\\n"," StructField(\"DETAILTYPE\", ByteType(), True),\\\n"," StructField(\"RECORDID\", ByteType(), True),\\\n"," StructField(\"PRICELEVEL\", ByteType(), True),\\\n"," StructField(\"UWSID\", IntegerType(), True),\\\n"," StructField(\"CHECKEMPLOYEEID\", LongType(), True),\\\n"," StructField(\"TRANSEMPLOYEEID\", LongType(), True),\\\n"," StructField(\"MANAGEREMPLOYEEID\", LongType(), True),\\\n"," StructField(\"STATUS\", StringType(), True),\\\n"," StructField(\"BINARYSTATUS\", BooleanType(), True),\\\n"," StructField(\"VOIDFLAG\", BooleanType(), True),\\\n"," StructField(\"GENFLAG1\", BooleanType(), True),\\\n"," StructField(\"REASONCODE\", ByteType(), True),\\\n"," StructField(\"LINECOUNT\", ByteType(), True),\\\n"," StructField(\"LINETOTAL\", DecimalType(), True),\\\n"," StructField(\"REPORTLINECOUNT\", ByteType(), True),\\\n"," StructField(\"REPORTLINETOTAL\", DecimalType(), True),\\\n"," StructField(\"REFERENCEINFO\", StringType(), True),\\\n"," StructField(\"MOVEFLAG\", BooleanType(), True),\\\n"," StructField(\"DONOTSHOW\", BooleanType(), True),\\\n"," StructField(\"DAYPARTID\", IntegerType(), True),\\\n"," StructField(\"PRICEOVRDEFLAG\", BooleanType(), True),\\\n"," StructField(\"TAXEXEMPTFLAG\", BooleanType(), True),\\\n"," StructField(\"ERRORCORRECTFLAG\", BooleanType(), True),\\\n"," StructField(\"REASONCODEID\", LongType(), True),\\\n"," StructField(\"TAX1TOTAL\", DecimalType(), True),\\\n"," StructField(\"MAJORGROUPID\", LongType(), True),\\\n"," StructField(\"FAMILYGROUPID\", LongType(), True),\\\n"," StructField(\"DTLID\", ByteType(), True),\\\n"," StructField(\"ACTIVETAXES\", StringType(), True),\\\n"," StructField(\"ADJUSTDATETIME\", TimestampType(), True),\\\n"," StructField(\"TAX1POSREF\", StringType(), True)\\\n"," ])\n","\n","\n","df = spark.read.format(\"csv\").schema(table_schema).load(file_path)\n","df.write.mode(\"overwrite\").format(\"delta\").saveAsTable(\"testcsv4ss\")\n"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"session_id":"152d1132-70fb-4f54-b05e-9462b3537040","statement_id":4,"state":"finished","livy_statement_state":"available","queued_time":"2024-01-28T15:12:58.2536048Z","session_start_time":null,"execution_start_time":"2024-01-28T15:12:58.8168711Z","execution_finish_time":"2024-01-28T15:19:08.811508Z","parent_msg_id":"e2513c7e-0b5b-4001-8952-67215a7e2a72"},"text/plain":"StatementMeta(, 152d1132-70fb-4f54-b05e-9462b3537040, 4, Finished, Available)"},"metadata":{}}],"execution_count":2,"metadata":{"advisor":{"adviceMetadata":"{\"artifactId\":\"207fa3db-593e-4ee6-874c-d702125d3793\",\"activityId\":\"152d1132-70fb-4f54-b05e-9462b3537040\",\"applicationId\":\"application_1706453980995_0001\",\"jobGroupId\":\"4\",\"advices\":{\"info\":1}}"},"cellStatus":"{\"Bob Duffy.\":{\"queued_time\":\"2024-01-28T15:12:58.2536048Z\",\"session_start_time\":null,\"execution_start_time\":\"2024-01-28T15:12:58.8168711Z\",\"execution_finish_time\":\"2024-01-28T15:19:08.811508Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}"},"id":"9e2c44d9-2a30-469d-9d59-aa608bdc6130"}],"metadata":{"language_info":{"name":"python"},"microsoft":{"language":"python","ms_spell_check":{"ms_spell_check_language":"en"}},"widgets":{},"kernelspec":{"name":"synapse_pyspark","language":"Python","display_name":"Synapse PySpark"},"kernel_info":{"name":"synapse_pyspark"},"nteract":{"version":"nteract-front-end@1.0.0"},"notebook_environment":{},"synapse_widget":{"version":"0.1","state":{}},"save_output":true,"spark_compute":{"compute_id":"/trident/default","session_options":{"enableDebugMode":false,"conf":{}}},"trident":{"lakehouse":{"known_lakehouses":[{"id":"19785e4d-5572-4ced-bfab-f26e7c5de3ce"}],"default_lakehouse":"19785e4d-5572-4ced-bfab-f26e7c5de3ce","default_lakehouse_name":"FabricLH","default_lakehouse_workspace_id":"9b8a6500-5ccb-49a9-885b-b5b081efed75"}}},"nbformat":4,"nbformat_minor":5} -------------------------------------------------------------------------------- /07_SharePoint/Ingest-SP.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","execution_count":1,"id":"37237e3e-00b3-4d8c-9cb4-f2c0cfc4676a","metadata":{"cellStatus":"{\"Bob Duffy.\":{\"queued_time\":\"2023-11-14T11:23:50.8769602Z\",\"session_start_time\":null,\"execution_start_time\":\"2023-11-14T11:24:00.2570934Z\",\"execution_finish_time\":\"2023-11-14T11:24:02.9397984Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}},"tags":["parameters"]},"outputs":[{"data":{"application/vnd.livy.statement-meta+json":{"execution_finish_time":"2023-11-14T11:24:02.9397984Z","execution_start_time":"2023-11-14T11:24:00.2570934Z","livy_statement_state":"available","parent_msg_id":"e42342a8-18a8-4ea3-8a90-1804d9cf7be9","queued_time":"2023-11-14T11:23:50.8769602Z","session_id":"6e3f0897-7362-45ce-8035-76fa2de6c546","session_start_time":null,"spark_jobs":{"jobs":[],"limit":20,"numbers":{"FAILED":0,"RUNNING":0,"SUCCEEDED":0,"UNKNOWN":0},"rule":"ALL_DESC"},"spark_pool":null,"state":"finished","statement_id":3},"text/plain":["StatementMeta(, 6e3f0897-7362-45ce-8035-76fa2de6c546, 3, Finished, Available)"]},"metadata":{},"output_type":"display_data"}],"source":["ConnectionSettings = '{\"library\": \"Unittest\", \"tenant_id\":\"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\",\"app_client_id\":\"app-fabricdw-dev-clientid\",\"app_client_secret\":\"app-fabricdw-dev-clientsecret\",\"keyvault\":\"kv-fabric-dev\",\"sharepoint_url\":\"prodata365.sharepoint.com\",\"site\":\"Fabric\"}'\n","SourceSettings = '{}'\n","SourceDirectory = 'tst'\n","TargetDirectory = 'unittest/AW/tst'\n","SourceObject = '*.xlsx'\n","TargetFileName = ''\n","TargetSettings = ''"]},{"cell_type":"code","execution_count":2,"id":"7afd78f6-20ac-4207-bf58-27d67caea2f4","metadata":{"cellStatus":"{\"Bob Duffy.\":{\"queued_time\":\"2023-11-14T11:23:50.9218891Z\",\"session_start_time\":null,\"execution_start_time\":\"2023-11-14T11:24:03.6553854Z\",\"execution_finish_time\":\"2023-11-14T11:24:06.6519076Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}","jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[{"data":{"application/vnd.livy.statement-meta+json":{"execution_finish_time":"2023-11-14T11:24:06.6519076Z","execution_start_time":"2023-11-14T11:24:03.6553854Z","livy_statement_state":"available","parent_msg_id":"5b9475e7-c090-4780-ae08-26736733adff","queued_time":"2023-11-14T11:23:50.9218891Z","session_id":"6e3f0897-7362-45ce-8035-76fa2de6c546","session_start_time":null,"spark_jobs":{"jobs":[],"limit":20,"numbers":{"FAILED":0,"RUNNING":0,"SUCCEEDED":0,"UNKNOWN":0},"rule":"ALL_DESC"},"spark_pool":null,"state":"finished","statement_id":4},"text/plain":["StatementMeta(, 6e3f0897-7362-45ce-8035-76fa2de6c546, 4, Finished, Available)"]},"metadata":{},"output_type":"display_data"}],"source":["from builtin.sharepoint import Sharepoint,AuthToken\n","import pandas\n","import json\n","from os.path import join\n","from pathlib import Path"]},{"cell_type":"code","execution_count":3,"id":"206c97e3-4b90-473a-88b2-47f3c3bb6f38","metadata":{"cellStatus":"{\"Bob Duffy.\":{\"queued_time\":\"2023-11-14T11:23:50.9837361Z\",\"session_start_time\":null,\"execution_start_time\":\"2023-11-14T11:24:07.3145173Z\",\"execution_finish_time\":\"2023-11-14T11:24:16.5076178Z\",\"state\":\"finished\",\"livy_statement_state\":\"available\"}}"},"outputs":[{"data":{"application/vnd.livy.statement-meta+json":{"execution_finish_time":"2023-11-14T11:24:16.5076178Z","execution_start_time":"2023-11-14T11:24:07.3145173Z","livy_statement_state":"available","parent_msg_id":"a8944e64-938f-4f15-a2da-309e92f0555b","queued_time":"2023-11-14T11:23:50.9837361Z","session_id":"6e3f0897-7362-45ce-8035-76fa2de6c546","session_start_time":null,"spark_jobs":{"jobs":[],"limit":20,"numbers":{"FAILED":0,"RUNNING":0,"SUCCEEDED":0,"UNKNOWN":0},"rule":"ALL_DESC"},"spark_pool":null,"state":"finished","statement_id":5},"text/plain":["StatementMeta(, 6e3f0897-7362-45ce-8035-76fa2de6c546, 5, Finished, Available)"]},"metadata":{},"output_type":"display_data"}],"source":["SourceSettings = SourceSettings or '{}'\n","ConnectionSettings = ConnectionSettings or '{}'\n","source_connection_options = json.loads(ConnectionSettings)\n","source_options = json.loads(SourceSettings)\n","\n","auth_token = AuthToken(**source_connection_options)\n","sharepoint = Sharepoint(auth_token, folder=SourceDirectory, file=SourceObject, **source_options, **source_connection_options)\n","\n","files = sharepoint.get_file_bytes()\n","\n","for file_name, file_bytes in files.items():\n"," Path(join(\"/lakehouse/default/Files/\",TargetDirectory)).mkdir(parents=True, exist_ok=True)\n","\n"," with open(join(\"/lakehouse/default/Files/\",TargetDirectory,file_name), \"wb\") as f:\n"," f.write(file_bytes.getbuffer())"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"host":{},"language":"python","ms_spell_check":{"ms_spell_check_language":"en"}},"notebook_environment":{},"nteract":{"version":"nteract-front-end@1.0.0"},"save_output":true,"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{},"enableDebugMode":false}},"synapse_widget":{"state":{},"version":"0.1"},"trident":{"lakehouse":{"default_lakehouse":"19785e4d-5572-4ced-bfab-f26e7c5de3ce","default_lakehouse_name":"FabricLH","default_lakehouse_workspace_id":"9b8a6500-5ccb-49a9-885b-b5b081efed75","known_lakehouses":[{"id":"19785e4d-5572-4ced-bfab-f26e7c5de3ce"}]}},"widgets":{}},"nbformat":4,"nbformat_minor":5} 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | 35 | # Visual Studio 2015/2017 cache/options directory 36 | .vs/ 37 | # Uncomment if you have tasks that create the project's static files in wwwroot 38 | #wwwroot/ 39 | 40 | # Visual Studio 2017 auto generated files 41 | Generated\ Files/ 42 | 43 | # MSTest test Results 44 | [Tt]est[Rr]esult*/ 45 | [Bb]uild[Ll]og.* 46 | 47 | # NUnit 48 | *.VisualState.xml 49 | TestResult.xml 50 | nunit-*.xml 51 | 52 | # Build Results of an ATL Project 53 | [Dd]ebugPS/ 54 | [Rr]eleasePS/ 55 | dlldata.c 56 | 57 | # Benchmark Results 58 | BenchmarkDotNet.Artifacts/ 59 | 60 | # .NET Core 61 | project.lock.json 62 | project.fragment.lock.json 63 | artifacts/ 64 | 65 | # ASP.NET Scaffolding 66 | ScaffoldingReadMe.txt 67 | 68 | # StyleCop 69 | StyleCopReport.xml 70 | 71 | # Files built by Visual Studio 72 | *_i.c 73 | *_p.c 74 | *_h.h 75 | *.ilk 76 | *.meta 77 | *.obj 78 | *.iobj 79 | *.pch 80 | *.pdb 81 | *.ipdb 82 | *.pgc 83 | *.pgd 84 | *.rsp 85 | *.sbr 86 | *.tlb 87 | *.tli 88 | *.tlh 89 | *.tmp 90 | *.tmp_proj 91 | *_wpftmp.csproj 92 | *.log 93 | *.tlog 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 298 | *.vbp 299 | 300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 301 | *.dsw 302 | *.dsp 303 | 304 | # Visual Studio 6 technical files 305 | *.ncb 306 | *.aps 307 | 308 | # Visual Studio LightSwitch build output 309 | **/*.HTMLClient/GeneratedArtifacts 310 | **/*.DesktopClient/GeneratedArtifacts 311 | **/*.DesktopClient/ModelManifest.xml 312 | **/*.Server/GeneratedArtifacts 313 | **/*.Server/ModelManifest.xml 314 | _Pvt_Extensions 315 | 316 | # Paket dependency manager 317 | .paket/paket.exe 318 | paket-files/ 319 | 320 | # FAKE - F# Make 321 | .fake/ 322 | 323 | # CodeRush personal settings 324 | .cr/personal 325 | 326 | # Python Tools for Visual Studio (PTVS) 327 | __pycache__/ 328 | *.pyc 329 | 330 | # Cake - Uncomment if you are using it 331 | # tools/** 332 | # !tools/packages.config 333 | 334 | # Tabs Studio 335 | *.tss 336 | 337 | # Telerik's JustMock configuration file 338 | *.jmconfig 339 | 340 | # BizTalk build output 341 | *.btp.cs 342 | *.btm.cs 343 | *.odx.cs 344 | *.xsd.cs 345 | 346 | # OpenCover UI analysis results 347 | OpenCover/ 348 | 349 | # Azure Stream Analytics local run output 350 | ASALocalRun/ 351 | 352 | # MSBuild Binary and Structured Log 353 | *.binlog 354 | 355 | # NVidia Nsight GPU debugger configuration file 356 | *.nvuser 357 | 358 | # MFractors (Xamarin productivity tool) working folder 359 | .mfractor/ 360 | 361 | # Local History for Visual Studio 362 | .localhistory/ 363 | 364 | # Visual Studio History (VSHistory) files 365 | .vshistory/ 366 | 367 | # BeatPulse healthcheck temp database 368 | healthchecksdb 369 | 370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 371 | MigrationBackup/ 372 | 373 | # Ionide (cross platform F# VS Code tools) working folder 374 | .ionide/ 375 | 376 | # Fody - auto-generated XML schema 377 | FodyWeavers.xsd 378 | 379 | # VS Code files for those working on multiple tools 380 | .vscode/* 381 | !.vscode/settings.json 382 | !.vscode/tasks.json 383 | !.vscode/launch.json 384 | !.vscode/extensions.json 385 | *.code-workspace 386 | 387 | # Local History for Visual Studio Code 388 | .history/ 389 | 390 | # Windows Installer files from build outputs 391 | *.cab 392 | *.msi 393 | *.msix 394 | *.msm 395 | *.msp 396 | 397 | # JetBrains Rider 398 | *.sln.iml 399 | -------------------------------------------------------------------------------- /07_SharePoint/builtin/sharepoint.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import requests 3 | import pandas 4 | import fnmatch 5 | from pandas import ExcelFile, DataFrame 6 | try: 7 | from notebookutils import mssparkutils 8 | USE_MSSPARKUTILS = True 9 | except ModuleNotFoundError: 10 | USE_MSSPARKUTILS = False 11 | # while this can be in the except statement - importing modules in an except 12 | # confuses linters -_- 13 | if not USE_MSSPARKUTILS: 14 | from azure.keyvault.secrets import SecretClient 15 | from azure.identity import DefaultAzureCredential 16 | class AuthToken: 17 | """Class to retrieve token from tenant id, client id, seceret and scope. 18 | 19 | :param str token_url: https://login.microsoftonline.com/ 20 | :param str tenant_id: 21 | :param str app_client_id: The id or keyvault secret name (if keyvault URL provided). 22 | :param str app_client_secret: The id or keyvault secret name (if keyvault URL provided). 23 | :param str scope: https://graph.microsoft.com/ 24 | :param str keyvault_url: https://{key-vault-name}.vault.azure.net/ or {key-vault-name}. 25 | """ 26 | 27 | def __init__( 28 | self, 29 | tenant_id, 30 | app_client_id, 31 | app_client_secret, 32 | scope="https://graph.microsoft.com/", 33 | keyvault: str or None = None, 34 | token_url: str or None = "https://login.microsoftonline.com/", 35 | **args 36 | ): 37 | self.access_token = None 38 | # Set the Token URL for Azure AD Endpoint 39 | if token_url is not None: 40 | self.token_url = f"{token_url}{tenant_id}/oauth2/token" 41 | else: 42 | self.token_url = ( 43 | f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" 44 | ) 45 | if keyvault: 46 | if not keyvault.startswith("https://"): 47 | keyvault = f"https://{keyvault}.vault.azure.net/" 48 | if USE_MSSPARKUTILS: 49 | app_client_id = mssparkutils.credentials.getSecret( 50 | keyvault, app_client_id 51 | ) 52 | app_client_secret = mssparkutils.credentials.getSecret( 53 | keyvault, app_client_secret 54 | ) 55 | else: 56 | secret_client = SecretClient( 57 | vault_url=keyvault, credential=DefaultAzureCredential() 58 | ) 59 | app_client_id = secret_client.get_secret(app_client_id).value 60 | app_client_secret = secret_client.get_secret(app_client_secret).value 61 | 62 | self.set_token(app_client_id, app_client_secret, scope) 63 | 64 | def set_token(self, client_id, client_secret, scope): 65 | """Sets the classes token value. 66 | :param str client_id: 67 | :param str client_secret: 68 | :param str scope: 69 | :return: None 70 | """ 71 | 72 | data = { 73 | "grant_type": "client_credentials", 74 | "client_id": client_id, 75 | "client_secret": client_secret, 76 | "resource": scope, 77 | } 78 | 79 | response = requests.post(self.token_url, data=data) 80 | response.raise_for_status() 81 | 82 | token_data = response.json() 83 | self.access_token = token_data["access_token"] 84 | class Sharepoint(): 85 | def __init__(self, auth_token : AuthToken, sharepoint_url = None, site=None, library=None, folder=None, file=None,**args): 86 | self.sharepoint_url = sharepoint_url 87 | self.site = site 88 | self.library = library 89 | self.folder = folder 90 | self.file = file 91 | self.headers = {"Authorization": f"Bearer {auth_token.access_token}"} 92 | def get_site_id_by_name(self, sharepoint_url= None, site_name = None): 93 | sharepoint_url = sharepoint_url or self.sharepoint_url 94 | site_name = site_name or self.site 95 | if not sharepoint_url: 96 | raise ValueError("sharepoint_url cannot be None or blank.") 97 | if not site_name: 98 | raise ValueError("site_name cannot be None or blank.") 99 | 100 | url = f"https://graph.microsoft.com/v1.0/sites/{sharepoint_url}:/sites/{site_name}?$select=id" 101 | 102 | headers = { 103 | 'Content-Type': 'application/x-www-form-urlencoded', 104 | **self.headers 105 | } 106 | 107 | response = requests.request("GET", url, headers=headers) 108 | response.raise_for_status() 109 | 110 | return response.json()["id"] 111 | 112 | 113 | def get_drive_id_by_name(self, site_id, library_name=None): 114 | library_name = library_name or self.library 115 | if (not site_id): 116 | raise ValueError("site_id cannot be None or blank.") 117 | if (not library_name): 118 | raise ValueError("library_name cannot be None or blank.") 119 | 120 | url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/" 121 | 122 | headers = { 123 | 'Content-Type': 'application/x-www-form-urlencoded', 124 | **self.headers 125 | } 126 | 127 | response = requests.request("GET", url, headers=headers) 128 | response.raise_for_status() 129 | 130 | drives = response.json()["value"] 131 | for drive in drives: 132 | if drive["name"] == library_name: 133 | return drive["id"] 134 | raise Exception("Drive name was not found.") 135 | 136 | def get_folder_id_by_name(self,site_id, drive_id, folder_name=None): 137 | folder_name = folder_name or self.folder 138 | if (not site_id): 139 | raise ValueError("site_id cannot be None or blank.") 140 | if (not drive_id): 141 | raise ValueError("drive_id cannot be None or blank.") 142 | if (not folder_name): 143 | raise ValueError("folder_name cannot be None or blank.") 144 | url = f"http://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/items/root:/{folder_name}" 145 | 146 | headers = { 147 | 'Content-Type': 'application/x-www-form-urlencoded', 148 | **self.headers 149 | } 150 | 151 | response = requests.request("GET", url, headers=headers) 152 | response.raise_for_status() 153 | 154 | return response.json()["id"] 155 | 156 | def get_file_url_by_name(self, site_id, drive_id, folder_id, file_name=None): 157 | file_name = file_name or self.file 158 | if not site_id or not drive_id or not folder_id or not file_name: 159 | raise ValueError("site_id, drive_id, folder_id, and file_name cannot be None or blank.") 160 | 161 | url = f"http://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/items/{folder_id}/children" 162 | headers = { 163 | **self.headers 164 | } 165 | 166 | response = requests.get(url, headers=headers) 167 | 168 | if response.status_code == 404: 169 | print(f"File not found: {file_name}") 170 | return {} # Return an empty dictionary if no matching files are found 171 | response.raise_for_status() 172 | 173 | files_returned = {} 174 | files = response.json()["value"] 175 | 176 | for file in files: 177 | if fnmatch.fnmatch(file["name"], file_name): 178 | files_returned[file["name"]] = file["@microsoft.graph.downloadUrl"] 179 | 180 | return files_returned 181 | 182 | def get_file_bytes(self, sharepoint_url:str | None =None, site_name:str | None=None,\ 183 | library_name:str | None=None, folder_name:str | None=None, file_name:str | None=None) -> dict[str,BytesIO]: 184 | sharepoint_url = sharepoint_url or self.sharepoint_url 185 | site_name = site_name or self.site 186 | library_name = library_name or self.library 187 | folder_name = folder_name or self.folder 188 | file_name = file_name or self.file 189 | 190 | site_id = self.get_site_id_by_name(sharepoint_url, site_name) 191 | drive_id = self.get_drive_id_by_name(site_id, library_name) 192 | folder_id = self.get_folder_id_by_name(site_id, drive_id, folder_name) 193 | file_url = self.get_file_url_by_name(site_id,drive_id,folder_id, file_name) 194 | 195 | file_return = {} 196 | for file, url in file_url.items(): 197 | response = requests.request("GET", url=file_url[file]) 198 | if response.status_code == 404: 199 | print(f"File not found: {file}") 200 | continue # Skip this file if it's not found 201 | response.raise_for_status() 202 | 203 | file_return[file] = BytesIO(response.content) 204 | return file_return 205 | 206 | def get_excel_file(self, sharepoint_url:str | None =None, site_name:str | None=None,\ 207 | library_name:str | None=None, folder_name:str | None=None, file_name: str | None = None) -> ExcelFile: 208 | sharepoint_url = sharepoint_url or self.sharepoint_url 209 | site_name = site_name or self.site 210 | library_name = library_name or self.library 211 | folder_name = folder_name or self.folder 212 | file_name = file_name or self.file 213 | if not file_name: raise Exception("Filename cannot be none.") 214 | if '*' in file_name or "%" in file_name: 215 | raise Exception("Wildcard name not supported for excel files.") 216 | file = self.get_file_bytes(sharepoint_url,site_name,library_name,folder_name, file_name) 217 | 218 | return ExcelFile(file[file_name]) 219 | def df_from_excel(excel_file : ExcelFile, sheet_name): 220 | if not sheet_name: # checks if empty or None 221 | yield (excel_file.sheet_names[0],pandas.read_excel(excel_file)) 222 | elif sheet_name == "*": 223 | for sheet in excel_file.sheet_names: 224 | yield (sheet,pandas.read_excel(excel_file, sheet_name=sheet)) 225 | else: 226 | for sheet in sheet_name.split(","): 227 | yield (sheet,pandas.read_excel(excel_file, sheet_name=sheet)) 228 | 229 | if __name__ == "__main__": 230 | import pandas 231 | import json 232 | from os.path import join 233 | from pathlib import Path 234 | 235 | # Environment parameters 236 | SourceConnectionSettings='{"tenant_id":"d8ca992a-5fbe-40b2-9b8b-844e198c4c94","app_client_id":"app-fabricdw-dev-clientid", "app_client_secret":"app-fabricdw-dev-clientsecret","keyvault":"kv-fabric-dev" ,"sharepoint_url":"prodata365.sharepoint.com","site" : "Fabric"}' 237 | # Source Settings 238 | SourceSettings = '{"library":"Unittest","sharepoint_url":"prodata365.sharepoint.com","site":"Fabric"}' 239 | # Pipeline Parameters 240 | SourceDirectory = "EmptyFolder" 241 | SourceObject = "*" 242 | TargetDirectory = "landing/erp" 243 | TargetFileName = "" 244 | 245 | source_connection_options = json.loads(SourceConnectionSettings) 246 | 247 | source_options = json.loads(SourceSettings) 248 | 249 | auth_token = AuthToken(**source_connection_options) 250 | sharepoint = Sharepoint(auth_token, folder=SourceDirectory, file=SourceObject, **source_options) 251 | 252 | files = sharepoint.get_file_bytes() 253 | 254 | for file_name, file_bytes in files.items(): 255 | Path(join("/lakehouse/default/Files/",TargetDirectory)).mkdir(parents=True, exist_ok=True) 256 | 257 | with open(join("/lakehouse/default/Files/",TargetDirectory,file_name), "wb") as f: 258 | f.write(file_bytes.getbuffer()) 259 | -------------------------------------------------------------------------------- /06_RefreshPowerBIDataset/pbi_refresh.py: -------------------------------------------------------------------------------- 1 | """Tools used to get workspace, dataset names and refresh datasets. 2 | 3 | Power BI tools to refresh a dataset using client secret authentication. 4 | 5 | Sample use (client id and secret values directly): 6 | tenant_id = "xxxxxxxx-5fbe-40b2-xxxx-xxxx198c4c94" # replace with your azure tenant id 7 | app_client_id = "XXXXXXXX-b37a-41ed-xxxx-xxxx558e66b3" 8 | app_client_secret = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" 9 | app_scope = "https://analysis.windows.net/powerbi/api" 10 | auth_token = AuthToken(tenant_id, app_client_id, app_app_client_secret, app_scope) 11 | 12 | workspace_name = "FabricDWUnitTests" # choose whichever workspace is applicable 13 | pbi_refresh = PowerBIRefresh(workspace_name, auth_token) 14 | 15 | dataset_name = "SampleDataset" 16 | pbi_refresh.refresh(dataset_name) 17 | 18 | Sample use (keyvault): 19 | tenant_id = "xxxxxxxx-5fbe-40b2-xxxx-xxxx198c4c94" # replace with your azure tenant id 20 | app_client_id_secretname = "fabricDW-app-client-id" 21 | app_client_secret_secretname = "fabricDW-app-client-secret" 22 | auth_token = AuthToken(tenant_id, app_client_id_secretname, app_client_secret_secretname) 23 | 24 | workspace_name = "FabricDWUnitTests" # choose whichever workspace is applicable 25 | pbi_refresh = PowerBIRefresh(workspace_name, auth_token) 26 | 27 | dataset_name = "SampleDataset" 28 | pbi_refresh.refresh(dataset_name) 29 | """ 30 | import time 31 | from os.path import join 32 | import requests 33 | 34 | try: 35 | from notebookutils import mssparkutils 36 | USE_MSSPARKUTILS = True 37 | except ModuleNotFoundError: 38 | USE_MSSPARKUTILS = False 39 | # while this can be in the except statement - importing modules in an except 40 | # confuses linters -_- 41 | if not USE_MSSPARKUTILS: 42 | from azure.keyvault.secrets import SecretClient 43 | from azure.identity import DefaultAzureCredential 44 | READ_STATUS_TIMER = 5 45 | REST_TIMEOUT = 10 46 | 47 | 48 | class AuthToken: 49 | """Class to retrieve token from tenant id, client id, seceret and scope. 50 | 51 | :param str token_url: https://login.microsoftonline.com/ 52 | :param str tenant_id: 53 | :param str app_client_id: The id or keyvault secret name (if keyvault URL provided). 54 | :param str app_client_secret: The id or keyvault secret name (if keyvault URL provided). 55 | :param str scope: https://analysis.windows.net/powerbi/api 56 | :param str keyvault_url: https://{key-vault-name}.vault.azure.net/ or {key-vault-name}. 57 | """ 58 | 59 | def __init__( 60 | self, 61 | tenant_id, 62 | app_client_id, 63 | app_client_secret, 64 | scope="https://analysis.windows.net/powerbi/api", 65 | keyvault: str or None = None, 66 | token_url: str or None = "https://login.microsoftonline.com/", 67 | ): 68 | self.access_token = None 69 | # Set the Token URL for Azure AD Endpoint 70 | if token_url is not None: 71 | self.token_url = f"{token_url}{tenant_id}/oauth2/token" 72 | else: 73 | self.token_url = ( 74 | f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" 75 | ) 76 | if keyvault: 77 | if not keyvault.startswith("https://"): 78 | keyvault = f"https://{keyvault}.vault.azure.net/" 79 | if USE_MSSPARKUTILS: 80 | app_client_id = mssparkutils.credentials.getSecret( 81 | keyvault, app_client_id 82 | ) 83 | app_client_secret = mssparkutils.credentials.getSecret( 84 | keyvault, app_client_secret 85 | ) 86 | else: 87 | secret_client = SecretClient( 88 | vault_url=keyvault, credential=DefaultAzureCredential() 89 | ) 90 | app_client_id = secret_client.get_secret(app_client_id).value 91 | app_client_secret = secret_client.get_secret(app_client_secret).value 92 | 93 | self.set_token(app_client_id, app_client_secret, scope) 94 | 95 | def set_token(self, client_id, client_secret, scope): 96 | """Sets the classes token value. 97 | :param str client_id: 98 | :param str client_secret: 99 | :param str scope: 100 | :return: None 101 | """ 102 | data = { 103 | "grant_type": "client_credentials", 104 | "client_id": client_id, 105 | "client_secret": client_secret, 106 | "resource": scope, 107 | } 108 | 109 | # Send POS request to obtain access token 110 | response = requests.post(self.token_url, data=data, timeout=REST_TIMEOUT) 111 | 112 | response.raise_for_status() 113 | 114 | token_data = response.json() 115 | self.access_token = token_data["access_token"] 116 | 117 | 118 | class PowerBIRefresh: 119 | """Class of tools to handle power BI refreshing. 120 | :param str base_url: https://api.powerbi.com/v1.0/myorg/ 121 | :param AuthToken auth_token: 122 | :param workspace_name: 123 | """ 124 | 125 | def __init__( 126 | self, 127 | workspace_name, 128 | auth_token: AuthToken or str, 129 | base_url: str or None = "https://api.powerbi.com/v1.0/myorg/", 130 | ): 131 | 132 | if isinstance(auth_token,str): 133 | self.headers = {"Authorization": f"Bearer {auth_token}"} 134 | elif isinstance(auth_token,AuthToken): 135 | self.headers = {"Authorization": f"Bearer {auth_token.access_token}"} 136 | 137 | self.base_url = base_url 138 | 139 | if isinstance(workspace_name, str): 140 | self.workspace_id = self.get_workspace_id(workspace_name) 141 | elif isinstance(workspace_name, list): 142 | self.workspace_id = self.get_workspace_id(workspace_name[0]) 143 | 144 | def get_workspace_id(self, workspace_name) -> str: 145 | """Returns a workspace name. 146 | 147 | :param str workspace_name: 148 | :raises WorkspaceNameNotFoundException: 149 | :return: Id of workspace. 150 | :rtype: str 151 | """ 152 | relative_url = join(self.base_url, "groups") 153 | 154 | response = requests.get( 155 | relative_url, headers=self.headers, timeout=REST_TIMEOUT 156 | ) 157 | if not response.ok: 158 | response.raise_for_status() 159 | 160 | workspaces = response.json()["value"] 161 | 162 | for workspace in workspaces: 163 | if workspace["name"] == workspace_name: 164 | self.workspace_id = workspace["id"] 165 | return self.workspace_id 166 | 167 | raise WorkspaceNameNotFoundException(workspace_name) 168 | 169 | def get_dataset_ids(self, dataset_names, workspace_id=None) -> list: 170 | """Returns a list of dataset ids from a list of dataset names. 171 | 172 | :param list(str) dataset_names: 173 | :param workspace_id: 174 | :type workspace_id: str or None 175 | :raises DatasetNameNotFoundException: 176 | :return: list of dataset ids 177 | :rtype: list 178 | """ 179 | workspace_id = self.workspace_id if workspace_id is None else workspace_id 180 | relative_url = join(self.base_url, f"groups/{workspace_id}/datasets") 181 | 182 | # Set the GET response using the relative URL 183 | response = requests.get( 184 | relative_url, headers=self.headers, timeout=REST_TIMEOUT 185 | ) 186 | 187 | if not response.ok: 188 | response.raise_for_status() 189 | 190 | dataset_ids = [] 191 | datasets = response.json()["value"] 192 | 193 | for dataset in datasets: 194 | for dataset_name in dataset_names: 195 | if dataset["name"] == dataset_name and dataset["isRefreshable"] is True: 196 | dataset_ids.append(dataset["id"]) 197 | return dataset_ids 198 | 199 | raise DatasetNameNotFoundException(dataset_names) 200 | 201 | def get_dataset_name(self, dataset_id, workspace_id=None) -> str: 202 | """Returns a datasetname from its id. 203 | 204 | :param str dataset_id: 205 | :param workspace_id: 206 | :type workspace_id: str or None 207 | :raises DatasetNameNotFoundException: 208 | :return: dataset id 209 | :rtype: str 210 | """ 211 | workspace_id = self.workspace_id if workspace_id is None else workspace_id 212 | relative_url = join(self.base_url, f"groups/{workspace_id}/datasets") 213 | response = requests.get( 214 | relative_url, headers=self.headers, timeout=REST_TIMEOUT 215 | ) 216 | 217 | if not response.ok: 218 | response.raise_for_status() 219 | 220 | datasets = response.json()["value"] 221 | for dataset in datasets: 222 | if dataset["id"] != dataset_id: 223 | pass 224 | if dataset["isRefreshable"] is True: 225 | return dataset["name"] 226 | raise DatasetNameNotFoundException(dataset_id) 227 | 228 | def refresh_dataset(self, dataset_id, workspace_id=None): 229 | """Refreshes a dataset by id. 230 | 231 | :param str dataset_id: 232 | :param workspace_id: 233 | :type workspace_id: str or None 234 | :raises DatasetRefreshFailedException: 235 | :return: None 236 | :rtype: None 237 | """ 238 | workspace_id = self.workspace_id if workspace_id is None else workspace_id 239 | relative_url = join( 240 | self.base_url, f"groups/{workspace_id}/datasets/{dataset_id}/refreshes" 241 | ) 242 | response = requests.post( 243 | relative_url, headers=self.headers, timeout=REST_TIMEOUT 244 | ) 245 | 246 | if response.ok: 247 | error_counter = 0 248 | error_limit = 5 249 | status = None 250 | 251 | print( 252 | f"Dataset {self.get_dataset_name(dataset_id, workspace_id)} refresh has been triggered successfully." 253 | ) 254 | 255 | while error_counter < error_limit or status == "Unknown": 256 | try: 257 | status = self.get_dataset_refresh_status(dataset_id, workspace_id) 258 | except requests.HTTPError: 259 | error_counter += 1 260 | time.sleep(READ_STATUS_TIMER) 261 | continue 262 | 263 | if status == "Failed": 264 | raise DatasetRefreshFailedException(self, workspace_id, dataset_id) 265 | if status == "Completed": 266 | error_counter = 0 267 | break 268 | 269 | if error_counter > error_limit: 270 | raise FailedToGetStatusException( 271 | workspace_id, dataset_id, error_counter 272 | ) 273 | else: 274 | print( 275 | f"Failed to trigger dataset" 276 | f"{self.get_dataset_name(dataset_id, workspace_id)} refresh." 277 | ) 278 | print("Response status code:", response.status_code) 279 | print("Response content:", response.content) 280 | response.raise_for_status() 281 | raise DatasetRefreshFailedException(self, workspace_id, dataset_id) 282 | 283 | def get_dataset_refresh_status(self, dataset_id, workspace_id=None) -> str: 284 | """Gets the refresh status of a dataset by its dataset id. 285 | 286 | :param str dataset_id: 287 | :param workspace_id: 288 | :type workspace_id: str or None 289 | :return: The status of dataset refresh (current or previous). 290 | :rtype: str 291 | """ 292 | workspace_id = self.workspace_id if workspace_id is None else workspace_id 293 | relative_url = join( 294 | self.base_url, 295 | f"groups/{workspace_id}/datasets/{dataset_id}/refreshes?$top=1", 296 | ) 297 | response = requests.get( 298 | relative_url, headers=self.headers, timeout=REST_TIMEOUT 299 | ) 300 | response.raise_for_status() 301 | refresh_status = response.json()["value"] 302 | status = refresh_status[0]["status"] 303 | return status 304 | 305 | def refresh( 306 | self, 307 | dataset_names: str or list(str), 308 | workspace_names: str or list(str) or None = None, 309 | ): 310 | """Invokes refresh of PowerBI Dataset, can be a list of workspaces and datasets or just one. 311 | 312 | :param workspace_names: List or comma seperated string of workspace names. 313 | :type workspace_names: str or list(str) or None 314 | :param dataset_names: List or comma seperated string of dataset names. 315 | :type dataset_names: str or list(str) 316 | :return: None. 317 | :rtype: None 318 | """ 319 | if isinstance(workspace_names, list): 320 | workspace_list = workspace_names 321 | elif workspace_names is None: 322 | workspace_list = [self.workspace_id] 323 | else: 324 | workspace_list = workspace_names.split(",") 325 | 326 | if dataset_names is None: 327 | raise DatasetNameBlankException() 328 | else: 329 | if isinstance(dataset_names, list): 330 | dataset_list = dataset_names 331 | else: 332 | dataset_list = dataset_names.split(",") 333 | 334 | for workspace_name in workspace_list: 335 | workspace_id = ( 336 | self.workspace_id 337 | if workspace_name == self.workspace_id 338 | else self.get_workspace_id(workspace_name) 339 | ) 340 | dataset_ids = self.get_dataset_ids(dataset_list, workspace_id) 341 | for dataset_id in dataset_ids: 342 | self.refresh_dataset(dataset_id, workspace_id) 343 | 344 | 345 | class WorkspaceNameNotFoundException(Exception): 346 | """Workspace name not found runtime exception.""" 347 | 348 | def __init__(self, workspace_name): 349 | message = f"workspace {workspace_name} Not Found" 350 | super().__init__(message) 351 | 352 | 353 | class DatasetNameNotFoundException(Exception): 354 | """Dataset name not found runtime exception.""" 355 | 356 | def __init__(self, dataset_name): 357 | message = f"Dataset Name {dataset_name} Not Found" 358 | super().__init__(message) 359 | 360 | 361 | class DatasetNameBlankException(Exception): 362 | """Dataset name was blank.""" 363 | 364 | def __init__(self): 365 | message = "Dataset Name cannot be blank" 366 | super().__init__(message) 367 | 368 | 369 | class DatasetRefreshFailedException(Exception): 370 | """Refresh of dataset was not successful""" 371 | 372 | def __init__(self, pbi_refr_tools, workspace_id, dataset_id): 373 | workspace_name = "" 374 | message = ( 375 | f"Dataset {pbi_refr_tools.get_pbi_dataset_name(dataset_id)} ({dataset_id})" 376 | + f"in workspace {workspace_name} ({workspace_id}) failed to refresh." 377 | ) 378 | super().__init__(message) 379 | 380 | 381 | class FailedToGetStatusException(Exception): 382 | """Failed to get status during refresh""" 383 | 384 | def __init__(self, workspace, dataset, retries): 385 | message = f"Dataset {dataset} in {workspace} failed to get status, after {retries} retries." 386 | super().__init__(message) 387 | 388 | 389 | if __name__ == "__main__": 390 | TENANT_ID = ( 391 | "xxxxxxxx-5fbe-xxxx-xxxx-xxxxxxxxxxxxx" # replace with your azure tenant id 392 | ) 393 | APP_CLIENT_ID = "" 394 | APP_CLIENT_SECRET = "" 395 | AUTH_TOKEN = AuthToken( 396 | TENANT_ID, APP_CLIENT_ID, APP_CLIENT_SECRET, keyvault="" 397 | ) 398 | 399 | WORKSPACE_NAME = "" # choose whichever workspace is applicable 400 | PBI_REFRESH = PowerBIRefresh(WORKSPACE_NAME, AUTH_TOKEN) 401 | DATASET_NAME = "" 402 | PBI_REFRESH.refresh(DATASET_NAME) 403 | --------------------------------------------------------------------------------