├── media ├── AADCredPassthru.png ├── ADBMount.png ├── ADBNotebook.png ├── AccessToMultipleDataset.png ├── AccessToSingleDataset.png ├── AccessViaSP.png ├── ClusterScopedServicePrincipal.png ├── Comparison.png ├── DNSFlow.png ├── NetworkFlow.png ├── PermissionByWorkspace.png ├── PrivateDNSIntegration.png ├── PrivateEndpoint2.png ├── Readme.md ├── SessionScopedServicePrincipal.png ├── SingleUserAccess.png ├── VNetPeering.png ├── firewall.png ├── firewall2.tif ├── notebook2.tif ├── privateendpoint.png ├── vnetlinked.png └── vnetlinked3.png ├── readme.md └── tutorials ├── pattern_3_AAD_Credential_passthrough ├── Pattern3.md ├── media │ ├── pattern_3_step_1.png │ ├── pattern_3_step_2.png │ ├── pattern_3_step_3.png │ ├── pattern_3_step_4.png │ ├── pattern_3_step_5.png │ ├── pattern_3_step_6.png │ ├── pattern_3_step_6_a.png │ ├── pattern_3_step_7.png │ ├── pattern_3_step_7_a.png │ ├── pattern_3_step_8.png │ └── pattern_3_step_9.png └── notebooks │ ├── testuser1 │ └── pattern3-AADPassthrough-group1.ipynb │ └── testuser2 │ └── pattern3-AADPassthrough-group2.ipynb ├── pattern_4_Cluster_scoped_Service_Principal ├── Pattern4.md ├── media │ ├── pattern_4_step_0.png │ ├── pattern_4_step_1.png │ ├── pattern_4_step_2.png │ ├── pattern_4_step_3.png │ ├── pattern_4_step_4.png │ ├── pattern_4_step_4_a.png │ ├── pattern_4_step_5.png │ ├── pattern_4_step_5_a.png │ ├── pattern_4_step_6.png │ └── pattern_4_step_7.png └── notebooks │ ├── testuser1 │ └── pattern4-Cluster-scoped-principals-iot_devices.ipynb │ └── testuser2 │ └── pattern4-Cluster-scoped-principals-loans.ipynb ├── pattern_6_Databricks_Table_Access_Control ├── Pattern6.md ├── media │ ├── pattern_6_step_1.png │ ├── pattern_6_step_10.png │ ├── pattern_6_step_10_a.png │ ├── pattern_6_step_1_a.png │ ├── pattern_6_step_2.png │ ├── pattern_6_step_3.png │ ├── pattern_6_step_4.png │ ├── pattern_6_step_5.png │ ├── pattern_6_step_5_a.png │ ├── pattern_6_step_6.png │ ├── pattern_6_step_7.png │ ├── pattern_6_step_8.png │ ├── pattern_6_step_8_a.png │ ├── pattern_6_step_8_b.png │ ├── pattern_6_step_8_c.png │ ├── pattern_6_step_9.png │ └── pattern_6_step_9_a.png └── notebooks │ ├── testuser1 │ └── pattern6-table-access-control.ipynb │ └── testuser2 │ └── pattern6-table-access-control.ipynb └── preparation ├── Preparation.md ├── media ├── preparation_step_1.png ├── preparation_step_2.png ├── preparation_step_2_a.png ├── preparation_step_2_b.png ├── preparation_step_3.png ├── preparation_step_4.png ├── preparation_step_5.png ├── preparation_step_6.png ├── preparation_step_6_a.png ├── preparation_step_6_b.png ├── preparation_step_7.png ├── preparation_step_8.png └── preparation_step_9.png └── notebooks └── Preparation.ipynb /media/AADCredPassthru.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/AADCredPassthru.png -------------------------------------------------------------------------------- /media/ADBMount.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/ADBMount.png -------------------------------------------------------------------------------- /media/ADBNotebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/ADBNotebook.png -------------------------------------------------------------------------------- /media/AccessToMultipleDataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/AccessToMultipleDataset.png -------------------------------------------------------------------------------- /media/AccessToSingleDataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/AccessToSingleDataset.png -------------------------------------------------------------------------------- /media/AccessViaSP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/AccessViaSP.png -------------------------------------------------------------------------------- /media/ClusterScopedServicePrincipal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/ClusterScopedServicePrincipal.png -------------------------------------------------------------------------------- /media/Comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/Comparison.png -------------------------------------------------------------------------------- /media/DNSFlow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/DNSFlow.png -------------------------------------------------------------------------------- /media/NetworkFlow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/NetworkFlow.png -------------------------------------------------------------------------------- /media/PermissionByWorkspace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/PermissionByWorkspace.png -------------------------------------------------------------------------------- /media/PrivateDNSIntegration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/PrivateDNSIntegration.png -------------------------------------------------------------------------------- /media/PrivateEndpoint2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/PrivateEndpoint2.png -------------------------------------------------------------------------------- /media/Readme.md: -------------------------------------------------------------------------------- 1 | Placeholder 2 | -------------------------------------------------------------------------------- /media/SessionScopedServicePrincipal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/SessionScopedServicePrincipal.png -------------------------------------------------------------------------------- /media/SingleUserAccess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/SingleUserAccess.png -------------------------------------------------------------------------------- /media/VNetPeering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/VNetPeering.png -------------------------------------------------------------------------------- /media/firewall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/firewall.png -------------------------------------------------------------------------------- /media/firewall2.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/firewall2.tif -------------------------------------------------------------------------------- /media/notebook2.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/notebook2.tif -------------------------------------------------------------------------------- /media/privateendpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/privateendpoint.png -------------------------------------------------------------------------------- /media/vnetlinked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/vnetlinked.png -------------------------------------------------------------------------------- /media/vnetlinked3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/media/vnetlinked3.png -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Securing access to Azure Data Lake Gen 2 from Azure Databricks 2 | 3 | **Summary:** 4 | 5 | This document provides guidance and approaches to securing access and connectivity to data in Azure Data Lake Storage from Databricks. 6 | 7 | **Versions:** 8 | 9 | | **Name** | **Title** | **Notes** | **Date** | 10 | | --- | --- | --- | --- | 11 | | Nicholas Hurt | Microsoft Cloud Solution Architect – Data & AI | Original | 20 Jan 2020 | 12 | | Nicholas Hurt, Wasim Ahmad | Microsoft Cloud Solution Architect – Data & AI | Added section on securely accessing ADLS | 03 Aug 2020 | 13 | | Anil Sener | Microsoft Cloud Solution Architect – Data & AI | Added tutorials for pattern 3, 4 and 6. | 01 December 2021 | 14 | | | | | | 15 | 16 | # Contents 17 | 18 | [Introduction](#Introduction) 19 | 20 | [Securing connectivity to ADLS](#securing-connectivity-to-ADLS) 21 | 22 | [Pattern 1 - Access via Service Principal](#Pattern-1---Access-via-Service-Principal) 23 | 24 | [Pattern 2 - Multiple workspaces — permission by workspace](#Pattern-2---Multiple-workspaces---permission-by-workspace) 25 | 26 | [Pattern 3 - AAD Credential passthrough](#Pattern-3---AAD-Credential-passthrough) 27 | 28 | [Pattern 4 - Cluster scoped Service Principal](#Pattern-4---Cluster-scoped-Service-Principal) 29 | 30 | [Pattern 5 - Session scoped Service Principal](#Pattern-5---Session-scoped-Service-Principal) 31 | 32 | [Pattern 6 - Databricks Table Access Control](#Pattern-6---Databricks-Table-Access-Control) 33 | 34 | [Conclusion](#Conclusion) 35 | 36 | [License/Terms of Use](#License/Terms-of-Use) 37 | 38 | 39 | ## Introduction 40 | 41 | There are a number of considerations when configuring access to Azure Data Lake Storage gen2 (ADLS) from Azure Databricks (ADB). How will Databricks users connect to the lake securely, and how does one configure access control based on identity? This article aims to provide an overview of [network security](https://docs.microsoft.com/en-us/azure/security/fundamentals/network-best-practices) between these two services as well as in-depth look at six [access control](https://docs.microsoft.com/en-us/azure/security/fundamentals/identity-management-best-practices) patterns, the advantages and disadvantages of each, and the scenarios in which they would be most appropriate. ADLS in the context of this article can be considered a v2 storage account with Hierarchical Namespace (HNS) enabled. 42 | 43 | ADLS offers more granular security than RBAC through the use of access control lists (ACLs) which can be applied at folder or file level. As per [best practice](https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-best-practices#use-security-groups-versus-individual-users) these should be assigned to AAD groups rather than individual users or service principals. Additionally, nesting groups(groups within groups) can offer even more agility and flexibility as permissions evolve. There are two main reasons for this: i.) changing ACLs can take time to propagate if there are 1000s of files, and ii.) there is a limit of 32 ACLs entries per file or folder. Understanding access control using RBAC and ACLs is outside the scope of this document but is covered [here](https://github.com/hurtn/datalake-on-ADLS/blob/master/Understanding%20access%20control%20and%20data%20lake%20configurations%20in%20ADLS%20Gen2.md). 44 | 45 | By way of a very simple example, a data lake may require two sets of permissions - engineers who run data pipelines and transformations requiring read-write access to a particular set of folders, and analysts who consume [read-only] curated analytics from another. At a minimum, two AAD security groups should be created to represent this division of responsibilities, namely a readers group and a writers group. Additional groups to represent the teams or business units could be nested inside these groups and the individuals added to their respective team group. The required permissions for the readers and writers groups to specific folders could be controlled using ACLs. Please see [the documentation](https://docs.microsoft.com/en-gb/azure/storage/blobs/data-lake-storage-access-control#access-control-lists-on-files-and-directories) for further details. For automated jobs, a [service principal](https://docs.microsoft.com/en-us/azure/active-directory/develop/app-objects-and-service-principals#service-principal-object) which has been added to the appropriate group should be used, instead of an individual user identity. Service principal credentials should be kept extremely secure and referenced only through [secrets](https://docs.microsoft.com/en-us/azure/databricks/security/secrets/). 46 | 47 | ## Securing connectivity to ADLS 48 | In Azure there are two types of PaaS service – those which are built using dedicated architecture, known as dedicated services, and those which are built using a shared architecture, known as shared services. Dedicated services use a mix of cloud resources (compute, storage, network) allocated from a pool, and are assigned to a dedicated instance of that service for a particular customer. These can be deployed within a customer virtual network, for example, a virtual machine. Shared services use a set of cloud resources which are assigned to more than one instance of a service, utilised by more than one customer, and therefore cannot be deployed within a single customer network, e.g., storage. Depending on the type of service, a different [VNet integration pattern](https://github.com/fguerri/AzureVNetIntegrationPatterns) is applied to make it accessible only from clients deployed within Azure VNets and not accessible from the internet. 49 | Azure Storage / ADLS gen2 is a shared service built using a shared architecture, and so to access it securely from Azure Databricks there are two options available. This Databricks [blog](https://databricks.com/blog/2020/02/28/securely-accessing-azure-data-sources-from-azure-databricks.html#:~:text=%20Securely%20Accessing%20Azure%20Data%20Sources%20from%20Azure,available%20to%20access%20Azure%20data%20services...%20More%20) summarises the following approaches: 50 | 51 | 1. [Service Endpoints](https://docs.microsoft.com/en-us/azure/virtual-network/virtual-network-service-endpoints-overview#key-benefits) 52 | 2. [Azure Private Link](https://docs.microsoft.com/en-us/azure/private-link/private-link-overview#key-benefits) 53 | 54 | Customers may use either approach for securing access between ADB and ADLS Gen2, but both require the ADB workspace to be [VNET injected](https://docs.microsoft.com/en-us/azure/databricks/administration-guide/cloud-configurations/azure/vnet-inject). 55 | 56 | ### Service Endpoints 57 | The [documentation](https://docs.microsoft.com/en-us/azure/storage/common/storage-network-security) explains how to configure service endpoints, and how to limit access to the storage account by configuring the storage firewall. Further secure the storage account from data exfiltration using a [service endpoint policy](https://docs.microsoft.com/en-us/azure/virtual-network/virtual-network-service-endpoint-policies-overview). 58 | 59 | ### Private Link 60 | 61 | The setup for storage service endpoints are less complicated than compared to Private Link, however Private Link is widely regarded as the most secure approach and indeed the recommended mechanism for securely connecting to ADLS G2 from Azure Databricks. It exposes the PaaS shared services (storage) via a private IP and thus overcomes the limitations of service endpoints and protects against data exfiltration __by default__. The setup of Private Link requires a number of configurations at the network and DNS level and the complexity encountered is around the DNS resolution to the service. The following [article](https://github.com/dmauser/PrivateLink/tree/master/DNS-Integration-Scenarios) goes into greater detail on DNS considerations and integration scenarios. The approach discussed below is to use Azure Private DNS Zones to host the “privatelink” zone. 62 | 63 | ### Connecting securely to ADLS from ADB 64 | 65 | The following steps will enable Azure Databricks to connect privately and securely with Azure Storage via private endpoint using a [hub and spoke](https://docs.microsoft.com/en-us/azure/architecture/reference-architectures/hybrid-networking/hub-spoke) configuration, i.e., ADB and private endpoints are in their respective spoke VNETs: 66 | 1. Deploy Azure Databricks into a VNet using the [Portal](https://docs.microsoft.com/en-us/azure/databricks/administration-guide/cloud-configurations/azure/vnet-inject#--create-the-azure-databricks-workspace-in-the-azure-portal) or [ARM template](https://azure.microsoft.com/en-us/resources/templates/101-databricks-all-in-one-template-for-vnet-injection/). 67 | 1. Create a [private storage account](https://docs.microsoft.com/en-us/azure/private-link/create-private-endpoint-storage-portal#create-your-private-endpoint) with a private endpoint and deploy it into the different VNet (i.e., create a new VNet named `spokevnet-storage-pl` beforehand) 68 | 1. Ensure the [private endpoint is integrated with a private DNS zone](https://docs.microsoft.com/en-us/azure/private-link/private-endpoint-dns) to host the privatelink DNS zone of the respective service, in this case `dfs.core.windows.net`. When creating the Private Endpoint, there is an option to integrate it with Private DNS as shown below: 69 | 70 | ![Private Endpoint](media/privateendpoint.png) 71 | 72 | 1. When ADB and Storage private endpoints are deployed in their respective VNets, there are some additional steps that need to be performed: 73 | 74 | a. The VNets should be [linked](https://docs.microsoft.com/en-us/azure/dns/private-dns-virtual-network-links) with the private DNS zone, as shown below (`databricks-vnetpl` and `spkevnet-storage-pl`): 75 | 76 | ![Vnet Linked](media/vnetlinked3.png) 77 | 78 | b. Also make sure both ADB and storage endpoint VNETs are [peered](https://docs.microsoft.com/en-us/azure/virtual-network/virtual-network-peering-overview): 79 | 80 | ![VNet Peering](media/VNetPeering.png) 81 | 82 | The network configuration should now be as follows: 83 | 84 | ![Network Flow](media/DNSFlow.png) 85 | 86 | c. Make sure the storage firewall is enabled. As an optional step you can also add the ADB VNet (`databricks-vnet`) to communicate with this storage account. When you enable this, storage endpoints will also be enabled on the ADB Vnet (`databricks-vnet`). 87 | 88 | ![VNet Peering](media/firewall.png) 89 | 90 | 1. In an ADB notebook you can double check if the FQDN of the storage is now resolving to private IP: 91 | 92 | ![ADB Notebook](media/ADBNotebook.png) 93 | 94 | 1. A mount can be created as normal using the same FQDN and it will connect privately to ADLS using private endpoints. 95 | 96 | ![ADB Mount](media/ADBMount.png) 97 | 98 | If you are using a proxy then service principal authentication may fail. To avoid the error you can use the following environment variables and specify your proxy URL: 99 | 100 | ``` 101 | http_url: Proxy FQDN, https_url: Proxy FQDN 102 | ``` 103 | 104 | Note: You can deploy the private endpoint for storage within the same VNet where ADB is injected but it should be a different subnet, i.e., it must not be deployed in the ADB private or public subnets. 105 | 106 | There are [further steps](https://databricks.com/blog/2020/03/27/data-exfiltration-protection-with-azure-databricks.html) one can take to harden the Databricks control plane using an Azure Firewall if required. 107 | 108 | In the next few sections we will discuss the various approaches to authenticate and patterns to implement access control based on permissions. 109 | 110 | ## Pattern 1 - Access via Service Principal 111 | 112 | To provide a group of users access to a particular folder (and it's 113 | contents) in ADLS, the simplest mechanism is to create a [mount point 114 | using a service 115 | principal](https://docs.microsoft.com/en-gb/azure/databricks/data/data-sources/azure/azure-datalake-gen2?toc=https%3A%2F%2Fdocs.microsoft.com%2Fen-gb%2Fazure%2Fazure-databricks%2FTOC.json&bc=https%3A%2F%2Fdocs.microsoft.com%2Fen-gb%2Fazure%2Fbread%2Ftoc.json#--mount-an-azure-data-lake-storage-gen2-account-using-a-service-principal-and-oauth-20) 116 | at the desired folder depth. The mount point (`/mnt/`) is 117 | created once-off per workspace but **is accessible to any user on any 118 | cluster in that workspace**. In order to secure access to different 119 | groups of users with different permissions, one will need more than just 120 | a single one mount point in one workspace. One of the patterns described 121 | below should be followed. 122 | 123 | *Note access keys couldn't be used to mount the ADLS, like they can be used for mounting of 124 | normal blob containers without HNS enabled. But access keys [could be used for direct access to data via APIs](https://docs.microsoft.com/en-us/azure/databricks/data/data-sources/azure/azure-datalake-gen2#--access-directly-using-the-storage-account-access-key). Since DBR 7.5, direct access to data is also [possible via shared access signatures](https://docs.microsoft.com/en-us/azure/databricks/data/data-sources/azure/azure-datalake-gen2#access-directly-using-sas-token-provider) as experimental feature.* 125 | 126 | Below is sample code to authenticate via a SP using OAuth2 and create a 127 | mount point in Python: 128 | 129 | ```python 130 | configs = { 131 | "fs.azure.account.auth.type": "OAuth", 132 | "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider", 133 | "fs.azure.account.oauth2.client.id": "enter-your-service-principal-application-id-here", 134 | "fs.azure.account.oauth2.client.secret": dbutils.secrets.get(scope = "enter-your-key-vault-secret-scope-name-here", key = "enter-the-secret"), 135 | "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/enter-your-tenant-id-here/oauth2/token" 136 | } 137 | 138 | dbutils.fs.mount( 139 | source = "abfss://file-system-name@storage-account-name.dfs.core.windows.net/folder-path-here", 140 | mount_point = "/mnt/mount-name", 141 | extra_configs = configs) 142 | ``` 143 | The creation of the mount point and listing of current mount points in 144 | the workspace can be done via the 145 | [CLI](https://docs.microsoft.com/en-gb/azure/databricks/dev-tools/cli/dbfs-cli) 146 | 147 | ```cli 148 | \>databricks configure --- token 149 | 150 | Databricks Host (should begin with ): 151 | https://eastus.azuredatabricks.net/?o=######### 152 | Token:dapi############### 153 | \>databricks fs ls dbfs:/mnt 154 | 155 | datalake 156 | ``` 157 | 158 | From an architecture perspective these are the basic components where 159 | "dl" is used to represent the mount name. 160 | ![Access via Service Principal](media/AccessViaSP.png) 161 | 162 | *Note the use of default ACLs otherwise any new folders created will be 163 | inaccessible* 164 | 165 | The mount point and ACLs could be at the filesystem (root) level or at 166 | the folder level to grant access at the required filesystem depth. 167 | 168 | Instead of mount points, access can also be via direct path --- Azure 169 | Blob Filesystem (ABFS - included in runtime 5.2 and above) as shown in 170 | the code snippet below. 171 | 172 | To access data directly using service principal, authorisation code must 173 | be executed in the same session prior to reading/writing the data for 174 | example: 175 | 176 | ```python 177 | # authenticate using a service principal and OAuth 2.0 178 | spark.conf.set("fs.azure.account.auth.type", "OAuth") 179 | spark.conf.set("fs.azure.account.oauth.provider.type", 180 | "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") 181 | spark.conf.set("fs.azure.account.oauth2.client.id", 182 | "enter-your-service-principal-application-id-here") 183 | spark.conf.set("fs.azure.account.oauth2.client.secret", 184 | dbutils.secrets.get(scope = "secret-scope-name", key = "secret-name")) 185 | spark.conf.set("fs.azure.account.oauth2.client.endpoint", 186 | "https://login.microsoftonline.com//enter-your-tenant-id-here/oauth2/token") 187 | 188 | # read data in delta format 189 | readdf=spark.read.format("delta").load(abfs://file-system-name@storage-account-name.dfs.core.windows.net/path-to-data") 190 | ``` 191 | 192 | Using a single service principal to authenticate users to a single 193 | location in the lake is unlikely to satisfy most security requirements 194 | -- it is too coarse grained, much like RBAC on Blob containers. It does 195 | not facilitate securing access to multiple groups of users of the lake 196 | who require different sets of permissions. One or more the following 197 | patterns may be followed to achieve the required level of 198 | granularity. 199 | 200 | 201 | ## Pattern 2 - Multiple workspaces --- permission by workspace 202 | 203 | This is an extension of the first pattern whereby multiple workspaces 204 | are provisioned, and different groups of users are assigned to different 205 | workspaces. Each group/workspace will use a different service principal 206 | to govern the level of access required, either via a configured mount 207 | point or direct path. Conceptually, this is a mapping of service 208 | principal to each group of users, and each service principal will have a 209 | defined set of permissions on the lake. In order to assign users to a 210 | workspace simply ensure they are registered in your Azure Active 211 | Directory (AAD) and an admin (those with [contributor or owner role on 212 | the 213 | workspace](https://docs.microsoft.com/en-us/azure/databricks/administration-guide/account-settings/account#assign-initial-account-admins) 214 | will need to [add 215 | users](https://docs.microsoft.com/en-us/azure/databricks/administration-guide/users-groups/users#--add-a-user) 216 | (with the same identity as in AAD) to the appropriate workspace. The 217 | architecture below depicts two different folders and two groups of users 218 | (readers and writers) on each. 219 | 220 | ![Permission by Workspace](media/PermissionByWorkspace.png) 221 | 222 | This pattern may offer excellent isolation at a workspace level however 223 | the main disadvantage to this approach is the proliferation of 224 | workspaces --- n groups = n workspaces. The workspace itself does not 225 | incur cost, but there may be an inherit increase in total cost of 226 | ownership. If more granular security is required than workspace level then 227 | one of the following patterns may be more suitable. 228 | 229 | ## Pattern 3 - AAD Credential passthrough 230 | AAD passthrough allows different groups of users to all work in the same 231 | workspace and access data either via mount point or direct path 232 | authenticated using their own credentials. The user's credentials are 233 | passed through to ADLS gen2 and evaluated against the files and folder 234 | ACLs. This feature is enabled at the cluster level under the advanced 235 | options. 236 | 237 | To [mount an ADLS filesystem or folder with AAD passthrough 238 | enabled](https://docs.microsoft.com/en-us/azure/databricks/data/data-sources/azure/adls-passthrough#azure-data-lake-storage-gen2-1) 239 | the following Scala may be used: 240 | 241 | ```scala 242 | val configs = Map( 243 | "fs.azure.account.auth.type" -> "CustomAccessToken", 244 | "fs.azure.account.custom.token.provider.class" -> spark.conf.get("spark.databricks.passthrough.adls.gen2.tokenProviderClassName")) 245 | 246 | // Optionally, you can add to the source URI of your mount point. 247 | dbutils.fs.mount( 248 | source = "abfss://file-system-name@storage-account-name.dfs.core.windows.net/folder-path-here", 249 | mountPoint = "/mnt/mount-name", 250 | extraConfigs = configs) 251 | ``` 252 | 253 | Any user reading or writing via the mount point will have their 254 | credentials evaluated. Alternatively, to access data directly without a 255 | mount point simply use the abfs path on a cluster with AAD Passthrough 256 | enabled, for example: 257 | ```python 258 | # read data in delta format using direct path 259 | readdf = spark.read\ 260 | .format("")\ 261 | .load("abfss://@.dfs.core.windows.net/") 262 | ``` 263 | Originally this functionality was only available using [high 264 | concurrency 265 | clusters](https://docs.microsoft.com/en-gb/azure/databricks/data/data-sources/azure/adls-passthrough#enable-azure-data-lake-storage-credential-passthrough-for-a-high-concurrency-cluster) 266 | and supported only Python and SQL notebooks, but recently [standard 267 | clusters support for AAD 268 | passthrough](https://docs.microsoft.com/en-gb/azure/databricks/data/data-sources/azure/adls-passthrough#--enable-azure-data-lake-storage-credential-passthrough-for-a-standard-cluster) 269 | using R and Scala notebooks were announced. One major consideration 270 | however for standard clusters, is that only [a single user can be 271 | enabled per 272 | cluster](https://docs.microsoft.com/en-us/azure/databricks/data/data-sources/azure/adls-passthrough#single-user). 273 | 274 | ![Single User Access](media/SingleUserAccess.png) 275 | 276 | A subtle but important difference in this pattern is that service 277 | principals are not required to delegate access, as it is the user's 278 | credentials that are used. 279 | 280 | ![AAD Credential Passthrough](media/AADCredPassthru.png) 281 | 282 | *Note: Access can still be either direct path or mount point* 283 | 284 | There are some [[further 285 | considerations]](https://docs.microsoft.com/en-gb/azure/databricks/data/data-sources/azure/adls-passthrough#known-limitations) 286 | to note at the time of writing: 287 | 288 | - The [[minimum runtime versions]](https://docs.microsoft.com/en-gb/azure/databricks/data/data-sources/azure/adls-passthrough#supported-features) 289 | as well as which PySpark ML APIs which are not supported, and 290 | associated supported features 291 | 292 | - **Databricks Connect is not supported** 293 | 294 | - **[Jobs](https://docs.microsoft.com/en-gb/azure/databricks/jobs#jobs)** 295 | are not supported 296 | 297 | - jdbc/odbc (BI tools) is not yet supported 298 | 299 | If any of these limitations present a challenge or there is a 300 | requirement to enable more than one Scala or R developer to work on a 301 | cluster at the same time, then you may need to consider one of the other 302 | patterns below. 303 | 304 | Please follow the [tutorial for Pattern 3](tutorials/pattern_3_AAD_Credential_passthrough/Pattern3.md). 305 | 306 | ## Pattern 4 - Cluster scoped Service principal 307 | 308 | In this pattern, each cluster is "mapped" to a unique service principal. 309 | By [restricting users or groups to a particular 310 | cluster](https://docs.microsoft.com/en-us/azure/databricks/administration-guide/access-control/cluster-acl#--configure-cluster-level-permissions), 311 | using the "can attach to" permission, it will ensure that access to the 312 | data lake is restricted by the ACLs assigned to the service principal. 313 | 314 | ![Cluster Scoped Service Principal](media/ClusterScopedServicePrincipal.png) 315 | 316 | This pattern will allow you to use multiple clusters in the same 317 | workspace, and "attach" a set of permissions according to the service 318 | principal set in the [cluster 319 | config](https://docs.microsoft.com/en-gb/azure/databricks/clusters/configure?toc=https%3A%2F%2Fdocs.microsoft.com%2Fen-gb%2Fazure%2Fazure-databricks%2FTOC.json&bc=https%3A%2F%2Fdocs.microsoft.com%2Fen-gb%2Fazure%2Fbread%2Ftoc.json): 320 | 321 | ``` 322 | fs.azure.account.auth.type OAuth 323 | fs.azure.account.oauth.provider.type org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider 324 | fs.azure.account.oauth2.client.id 325 | fs.azure.account.oauth2.client.secret {{secrets//}} 326 | fs.azure.account.oauth2.client.endpoint https://login.microsoftonline.com//oauth2/token 327 | ``` 328 | 329 | *Note the method in which secrets are referenced in the config section 330 | as it is different from the usual dbutils syntax* 331 | 332 | The benefit of this approach is that the scope and secret names are not 333 | exposed to end-users and they do not require read access to the secret 334 | scope however the creator of the cluster will. But please note that this secret will be available to all users of cluster. 335 | 336 | Users should use the direct access method, via ABFS, and mount points 337 | should be forbidden, unless of course there is a global folder everyone 338 | in the workspace needs access to. Until there is an in-built way to 339 | prevent mount points being created, you may wish to write an alert 340 | utility which runs frequently checking for any mount points using the 341 | CLI (as shown in the first pattern) and sends a notification if any 342 | unauthorised mount points are detected. 343 | 344 | This pattern could be useful when both engineers and analysts require 345 | different sets of permissions and assigned to the same workspace. The 346 | engineers may need read access to one or more source data sets and then 347 | write access to a target location, with read-write access to a staging 348 | or working location. This requires a single service principal to have 349 | access to all the data sets in order for the code to execute fully --- 350 | more about this in the next pattern. The analysts however may need read 351 | access to the target folder and nothing else. 352 | 353 | The disadvantage of this approach is dedicated clusters for each 354 | permission group, i.e., no sharing of clusters across permission groups. 355 | In other words, each service principal, and therefore each cluster, 356 | should have sufficient permissions in the lake to run the desired 357 | workload on that cluster. The reason for this is that a cluster can only 358 | be configured with a single service principal at a time. In a production 359 | scenario the config should be specified through scripting the 360 | provisioning of clusters using the CLI or API. 361 | 362 | Depending on the number of permission groups required, this pattern 363 | could result in a proliferation of clusters. The next pattern may 364 | overcome this challenge but will require each user to execute 365 | authentication code at run time. 366 | 367 | Please follow the [tutorial for Pattern 4](tutorials/pattern_4_Cluster_scoped_Service_Principal/Pattern4.md). 368 | 369 | ## Pattern 5 - Session scoped Service principal 370 | 371 | In this pattern, access control is governed at the session level so a 372 | cluster may be shared by multiple groups of users, each using a set of 373 | service principal credentials. *Normally, clusters with a number of 374 | concurrent users and jobs will require a [high 375 | concurrency](https://docs.microsoft.com/en-gb/azure/databricks/clusters/configure#--high-concurrency-clusters) 376 | cluster to ensure resources are shared fairly.* The user attempting to 377 | access ADLS will need to use the direct access method and execute the 378 | OAuth code prior to accessing the required folder. Consequently, this 379 | approach will not work when using odbc/jdbc connections. Also note, that 380 | **only one service principal can be set in session at a time** and this 381 | will have a significant influence the design based on Spark's lazy 382 | evaluation, as described later. Below is sample OAuth code, which is 383 | very similar to the code used in pattern 1 above: 384 | ```python 385 | # authenticate using a service principal and OAuth 2.0 386 | spark.conf.set("fs.azure.account.auth.type", "OAuth") 387 | spark.conf.set("fs.azure.account.oauth.provider.type", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") 388 | spark.conf.set("fs.azure.account.oauth2.client.id", "enter-your-service-principal-application-id-here") 389 | spark.conf.set("fs.azure.account.oauth2.client.secret", dbutils.secrets.get(scope = "secret-scope-name", key = "secret-name")) 390 | spark.conf.set("fs.azure.account.oauth2.client.endpoint", "https://login.microsoftonline.com/enter-your-tenant-id-here/oauth2/token") 391 | 392 | # read data in delta format 393 | readdf=spark.read.format("delta").load("abfss://file-system-name@storage-account-name.dfs.core.windows.net/path-to-data") 394 | ``` 395 | This pattern works well where different permission groups (such as 396 | analysts and engineers) are required but one does not wish to take on 397 | the administrative burden of isolating the user groups by cluster. *As 398 | in the previous approach, mounting folders using the provided service 399 | principal/secret scope details should be forbidden.* 400 | 401 | The mechanism which ensures that each group has the appropriate level of 402 | access is through their ability to "use" a service principal which has 403 | been added to the AAD group with the desired level of access. The way to 404 | effectively "map" the user group's level of access to a particular 405 | service principal is by granting the Databricks user group access to the 406 | secret scope (see below) which stores the credentials for that service 407 | principal. Armed with the secret scope name and the associated key 408 | name(s), users can then run the authorisation code shown above. The 409 | "client.secret" (service principal's secret) is stored as a secret in 410 | the secret scope but so to can any other sensitive details such as the 411 | service principals application ID and tenant ID. 412 | 413 | The disadvantage to this approach is the **proliferation of secret 414 | scopes of which there is a limit of 100 per workspace**. Additionally 415 | the [premium 416 | plan](https://databricks.com/product/azure-pricing) is 417 | required in order to assign granular permissions to the secret scope. 418 | 419 | To help explain this pattern further, and the setup required, examine 420 | the following simple scenario: 421 | 422 | ![Session Scoped Service Principal](media/SessionScopedServicePrincipal.png) 423 | 424 | The above diagram depicts a single folder (A) with two sets of 425 | permissions, readers and writers. AAD groups reflect these roles and 426 | have been assigned appropriate folder ACLs. Each AAD group contains a 427 | service principal and the credentials for each service principal have 428 | been stored in a unique secret scope. Each 429 | [group](https://docs.microsoft.com/en-us/azure/databricks/administration-guide/users-groups/groups) 430 | in the Databricks workspace contains the appropriate users, and the 431 | group has been [assigned READ 432 | ACLs](https://docs.microsoft.com/en-us/azure/databricks/dev-tools/cli/secrets-cli) 433 | on the associated secret scope, which allows a group of users to "use" 434 | the service principal mapped to their level of permission. 435 | 436 | Below is an example CLI command of how to grant read permissions to the 437 | "GrWritersA" Databricks group on "SsWritersA" secret scope. Note that 438 | ACLs are at secret scope level, not at secret level which means that one 439 | secret scope will be required per service principal. 440 | ```CLI 441 | databricks secrets put-acl --scope SsWritersA --principal GrWritersA --permission READ 442 | databricks secrets get-acl --scope SsWritersA --principal GrWritersA 443 | Principal Permission 444 | — — — — — — — — — — — — 445 | GrWritersA READ 446 | ``` 447 | How this is may be implemented for your data lake scenario requires 448 | careful thought and planning. In very general terms this pattern being 449 | applied in one of two ways, at folder granularity, representing a 450 | department or data lake zone (1) or at data project or "data module" 451 | granularity (2): 452 | 453 | 1. Analysts (read-only) and engineers (read-write) are working within a 454 | single folder structure, and they do not require access to 455 | additional datasets outside of their current directory. The diagram 456 | below depicts two folders A and B, perhaps representing two 457 | departments. Each department has their own analysts and engineers 458 | working on their data, and should not be allowed access to the other 459 | department's data. 460 | 461 | ![Access To Single Dataset](media/AccessToSingleDataset.png) 462 | 463 | 2. Using the diagram below for reference, engineers and analysts are 464 | working on different projects and should have clear separation of 465 | concerns. Engineers working on "Module 1" require read access to 466 | multiple source data assets (A & B). Transformations and joins are 467 | run to produce another data asset ( C ). Engineers also require a 468 | working or staging directory to persisting output during various 469 | stages (X). For the entire pipeline to execute, "Service Principal 470 | for Module 1 Developers" has been added to the relevant AAD groups 471 | which provide access to all necessary folders (A, B, X, C) through 472 | the assigned ACLs. 473 | 474 | Analysts need to produce analytics using the new data asset ( C ) 475 | but should not have access to the source data, therefore, they use 476 | the "Service Principal for Dataset C" which was added to the Readers 477 | C group only. 478 | 479 | ![Access To Multiple Dataset](media/AccessToMultipleDataset.png) 480 | 481 | It may seem more logical to have one service principal per data asset 482 | but when multiple permissions are required for a single pipeline to 483 | execute in Spark, then one needs to consider how [lazy 484 | evaluation](https://data-flair.training/blogs/apache-spark-lazy-evaluation/) 485 | works. When attempting to use multiple service principals in the same 486 | notebook/session one needs to remember that the read and write will be 487 | executed only once the write is triggered. One cannot therefore set the 488 | authentication to one service principal for one folder and then to 489 | another prior to the final write operation, all in the same 490 | notebook/session, as the read operation will be executed only when the 491 | write is triggered. 492 | 493 | *This means a single service principal will need to encapsulate the 494 | permissions of a single pipeline execution rather than a single service 495 | principal per data asset.* 496 | 497 | ## Pattern 6 - Databricks Table Access Control 498 | 499 | 500 | One final pattern, which not technically an access pattern to ADLS, 501 | implements security at the table (or view) level rather than the data 502 | lake level. This method is native to Databricks and involves granting, 503 | denying, revoking access to tables or views which may have been created 504 | from files residing in ADLS. Access is granted programmatically (from 505 | Python or SQL) to tables or views based on user/group. This approach 506 | requires [both cluster and table access 507 | control](https://docs.microsoft.com/en-gb/azure/databricks/administration-guide/access-control/table-acls/table-acl#enable-table-access-control-at-the-account-level) 508 | to be enabled and requires a [premium tier 509 | workspace](https://databricks.com/product/azure-pricing). 510 | File access is disabled through a [cluster level 511 | configuration](https://docs.microsoft.com/en-gb/azure/databricks/administration-guide/access-control/table-acls/table-acl#sql-only-table-access-control) 512 | which ensures the only method of data access for users is via the 513 | pre-configured tables or views. This works well for analytical (BI) 514 | tools accessing tables/views via odbc but limits users in their ability 515 | to access files directly and does not support R and Scala. 516 | 517 | Please follow the [tutorial for Pattern 6](tutorials/pattern_6_Databricks_Table_Access_Control/Pattern6.md). 518 | 519 | ## Conclusion 520 | 521 | This white paper has examined a number of access patterns to Azure Data 522 | Lake gen2 available from Azure Databricks. There are merits and 523 | disadvantages of each, and most likely it will be a combination of these 524 | patterns which will suit a production scenario. Below is a table 525 | summarising the above access patterns and some important considerations 526 | of each. 527 | 528 | ![Comparison of options](media/Comparison.png) 529 | 530 | ## License/Terms of Use 531 | 532 | This is a free white paper released into the public domain. 533 | 534 | Anyone is free to use or distribute this white paper, for any purpose, 535 | commercial or non-commercial, and by any means. 536 | 537 | THE WHITE PAPER IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, 538 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 539 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 540 | 541 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 542 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 543 | FROM, OUT OF OR IN CONNECTION WITH THE WHITE PAPER. 544 | 545 | -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/Pattern3.md: -------------------------------------------------------------------------------- 1 | # Tutorial for Pattern 3: Azure Active Directory Credential passthrough 2 | 3 | **Summary:** 4 | 5 | This document provides links to a tutorial on implementing Pattern 3: Azure Active Directory Credential passthrough 6 | 7 | **Versions:** 8 | 9 | | **Name** | **Title** | **Notes** | **Date** | 10 | | --- | --- | --- | --- | 11 | | Anil Sener | Microsoft Cloud Solution Architect – Data & AI | Original | 01 December 2021 | 12 | | | | | | 13 | 14 | # Contents 15 | 16 | [Tutorial Steps](#Tutorial-Steps) 17 | 18 | [Pre-requisites](#Pre-requisites) 19 | 20 | [License/Terms of Use](#License/Terms-of-Use) 21 | 22 | ## Pre-requisites 23 | 24 | This tutorial requires the completion of the steps in [Connecting securely to ADLS from ADB](../../Readme.md#connecting-securely-to-adls-from-adb) section. 25 | 26 | This tutorial requires the completion of the steps in the [preparation](../preparation/Readme.md) section. 27 | 28 | This tutorial requires a premium Databricks Workspace. 29 | 30 | Install [Databricks Client](https://docs.databricks.com/dev-tools/cli/index.html). 31 | 32 | ## Tutorial Steps 33 | 1. Navigate to the Storage Accounts, drill down to the storage account created in the setup steps and navigate to Containers to display test_container. Then, click ... icon on the rightern side of the container and select Manage ACLs: 34 |

35 | 36 |

37 | 38 | 2. When the ACLs for the container are displayed, add the principals for the active directory groups called group1 and group2 which should be already created as a part of preparation steps. Please check only Execute right for these principals and click to Save button: 39 |

40 | 41 |

42 | 43 | 3. Click on test_container and then you should be able to see the folders inside the container. Then, click ... icon on the rightern side of the iot_devices folder and select Manage ACLs: 44 |

45 | 46 |

47 | 48 | 4. When the ACLs for the iot_devices folder are displayed, add the principals for the active directory group group1 and check Read and Execute and click to Save button: 49 |

50 | 51 |

52 | 53 | 5. When the ACLs for the loans folder are displayed, add the principals for the active directory group group2 and check Read and Execute and click to Save button: 54 |

55 | 56 |

57 | 58 | 6. Navigate to the premium Azure Databricks Workspace > Overview and click Launch Workspace button, choose and an admin user to login. When Azure Databricks Workspace is displayed, navigate to Compute. Then, create a Standard cluster for TestUser1 enabling the credential passthrough with the following settings. Set the permissions to allow IoTDevicesGroup to attach this cluster: 59 |

60 | 61 |

62 | 63 |

64 | 65 |

66 | 67 | 7. Then, create another Standard cluster for TestUser2 enabling the credential passthrough with the following settings.Set the permissions to allow LoanGroup to attach this cluster: 68 |

69 | 70 |

71 | 72 |

73 | 74 |

75 | 76 | 8. Navigate to the premium Azure Databricks Workspace > Overview on Azure Portal and click Launch Workspace button, choose and TestUser1 user to login. When Azure Databricks Workspace is displayed, navigate to Workspace, then upload [pattern3-AADPassthrough-group1.ipynb](notebooks/testuser1/pattern3-AADPassthrough-group1.ipynb) notebook to the Databricks Workspace and open the notebook, attach & start the cluster created in step 6 and then run all cells: 77 |

78 | 79 |

80 | 81 | RESULT: Files unders /iot_devices folder are readable meanwhile /loans raise an exception due to lack of privileges to access these files on ADLS Gen2. These permissions are limited thanks to the privileges of group1 on ADLS Gen2 ACLs. 82 | 83 | 9. Navigate to the premium Azure Databricks Workspace > Overviewon Azure Portal and click Launch Workspace button, choose and TestUser2 user to login. When Azure Databricks Workspace is displayed, navigate to Workspace, then upload [pattern3-AADPassthrough-group2.ipynb](notebooks/testuser2/pattern3-AADPassthrough-group2.ipynb) notebook to the Databricks Workspace and open the notebook, attach & start the cluster created in step 7 and then run all cells: 84 |

85 | 86 |

87 | 88 | RESULT: Files unders /loans folder are readable meanwhile /iot_devices raise an exception due to lack of privileges to access these files on ADLS Gen2. These permissions are limited thanks to the privileges of group1 on ADLS Gen2 ACLs. 89 | 90 | ## License/Terms of Use 91 | 92 | This is a free white paper released into the public domain. 93 | 94 | Anyone is free to use or distribute this white paper, for any purpose, 95 | commercial or non-commercial, and by any means. 96 | 97 | THE WHITE PAPER IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, 98 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 99 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 100 | 101 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 102 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 103 | FROM, OUT OF OR IN CONNECTION WITH THE WHITE PAPER. 104 | 105 | -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_1.png -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_2.png -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_3.png -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_4.png -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_5.png -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_6.png -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_6_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_6_a.png -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_7.png -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_7_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_7_a.png -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_8.png -------------------------------------------------------------------------------- /tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_3_AAD_Credential_passthrough/media/pattern_3_step_9.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/Pattern4.md: -------------------------------------------------------------------------------- 1 | # Tutorial for Pattern 4: Cluster Scoped Service Principal 2 | 3 | **Summary:** 4 | 5 | This document provides links to a tutorial on implementing Pattern 4: Cluster Scoped Service Principal 6 | 7 | **Versions:** 8 | 9 | | **Name** | **Title** | **Notes** | **Date** | 10 | | --- | --- | --- | --- | 11 | | Anil Sener | Microsoft Cloud Solution Architect – Data & AI | Original | 01 December 2021 | 12 | | | | | | 13 | 14 | # Contents 15 | 16 | [Pre-requisites](#Pre-requisites) 17 | 18 | [Tutorial Steps](#Tutorial-Steps) 19 | 20 | [License/Terms of Use](#License/Terms-of-Use) 21 | 22 | ## Pre-requisites 23 | 24 | This tutorial requires the completion of the steps in [Connecting securely to ADLS from ADB](../../Readme.md#connecting-securely-to-adls-from-adb) section. 25 | 26 | This tutorial requires the completion of the steps in the [preparation](../preparation/Readme.md) section. 27 | 28 | This tutorial requires a premium Databricks Workspace. 29 | 30 | Install [Databricks Client](https://docs.databricks.com/dev-tools/cli/index.html). 31 | 32 | ## Tutorial Steps 33 | 1. Navigate to Azure Active Directory > App registrations and create two application registration service principals one for loans and another for iot_devices: 34 |

35 | 36 |

37 | 38 | 2. Drill down to app registration created for iot_devices and navigate to Certificates & secrets > Client secrets and create a secret and copy the secret value to be used in KeyVault in the later steps: 39 |

40 | 41 |

42 | 43 | 3. Drill down to app registration created for loans and navigate to Certificates & secrets > Client secrets/ and create a secret and copy the secret value to be used in KeyVault in the later steps: 44 |

45 | 46 |

47 | 48 | 4. Navigate to the KeyVaults and search for the Keyvault called rr-demo which is created during the setup process, then navigate to Secrets and create two secrets for iot_devices and loans using the client secret values generated in step 2 and 3: 49 |

50 | 51 |

52 | 53 | 5. When the ACLs for the container are displayed, add the principals for App registrations created in step 1. Please check only Execute right for these principals and click to Save button: 54 |

55 | 56 |

57 | 58 | 6. Click on test_container and then you should be able to see the folders inside the container. Then, click ... icon on the rightern side of the iot_devices folder and select Manage ACLs: 59 |

60 | 61 |

62 | 63 | 7. When the ACLs for the iot_devices folder are displayed, add the principals for the app registration service principal created for iot_devices and check Read and Execute and click to Save button: 64 |

65 | 66 |

67 | 68 | 8. When the ACLs for the loans folder are displayed, add the principals for the app registration service principal created for iot_devices and check Read and Execute and click to Save button: 69 |

70 | 71 |

72 | 73 | 9. Navigate to the premium Azure Databricks Workspace > Overview and click Launch Workspace button, choose and admin user to login. When Azure Databricks Workspace is displayed, navigate to Settings > User Settings and then click the Generate New Token button. Copy the token. 74 | 75 | 10. Launch a commandline application in you pc and use [Databricks Client](https://docs.databricks.com/dev-tools/cli/index.html) to execute the following command to configure the databricks client with the workspace providing your premium workspace url and token generated in the previous step: 76 | ```sh 77 | databricks configure --token 78 | ``` 79 | 80 | 11. After the [Databricks Client](https://docs.databricks.com/dev-tools/cli/index.html) configuration execute the following commands to create 2 Databricks secret scopes for iot_devices and loans and to create one Databricks secret per each to be referred in the subsequent steps in this tutorial. Please remember to replace `` with your own in each command below: 81 | 82 | ```sh 83 | databricks secrets create-scope --scope iot-group-scope --scope-backend-type AZURE_KEYVAULT --resource-id /subscriptions//resourceGroups/RR-demo/providers/Microsoft.KeyVault/vaults/rr-demo-kv --dns-name https://rr-demo-kv.vault.azure.net/ 84 | 85 | databricks secrets put-acl --scope iot-group-scope --principal IoTDevicesGroup --permission READ 86 | 87 | databricks secrets create-scope --scope loan-group-scope --scope-backend-type AZURE_KEYVAULT --resource-id /subscriptions//resourceGroups/RR-demo/providers/Microsoft.KeyVault/vaults/rr-demo-kv --dns-name https://rr-demo-kv.vault.azure.net/ 88 | 89 | databricks secrets put-acl --scope loan-group-scope --principal LoanGroup --permission READ 90 | ``` 91 | 92 | These commands will limit access of Databricks groups only to the app registration service principals relavant to their group. Ideally, we should have created separate Azure key vaults for each secret scope. 93 | 94 | 12. Navigate to the premium Azure Databricks Workspace > Overview and click Launch Workspace button, choose and admin user to login. When Azure Databricks Workspace is displayed, navigate to Compute and then create a cluster to be used by IoTDevices group by setting the Spark Configuration as below. Set the permissions to allow IoTDevicesGroup to attach this cluster. Please remember to replace ``,`` and `` (your Azure Tenant ID): 95 | 96 | ``` 97 | fs.azure.account.auth.type OAuth 98 | fs.azure.account.oauth.provider.type org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider 99 | fs.azure.account.oauth2.client.id 100 | fs.azure.account.oauth2.client.secret {{secrets/iot-group-scope/}} 101 | fs.azure.account.oauth2.client.endpoint https://login.microsoftonline.com//oauth2/token 102 | ``` 103 | 104 |

105 | 106 |

107 | 108 |

109 | 110 |

111 | 112 | 13. Create another cluster to be used by LoanGroup group by setting the Spark Configuration as below. Set the permissions to allow LoanGroup to attach this cluster. Please remember to replace ``,`` and `` (your Azure Tenant ID):: 113 | 114 | ``` 115 | fs.azure.account.auth.type OAuth 116 | fs.azure.account.oauth.provider.type org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider 117 | fs.azure.account.oauth2.client.id 118 | fs.azure.account.oauth2.client.secret {{secrets/loan-group-scope/}} 119 | fs.azure.account.oauth2.client.endpoint https://login.microsoftonline.com//oauth2/token 120 | ``` 121 | 122 |

123 | 124 |

125 | 126 |

127 | 128 |

129 | 130 | 14. Navigate to the premium Azure Databricks Workspace > Overview on Azure Portal and click Launch Workspace button, choose and TestUser1 user to login. When Azure Databricks Workspace is displayed, navigate to Workspace, then upload [pattern4-Cluster-scoped-principals-iot_devices.ipynb](notebooks/testuser1/pattern4-Cluster-scoped-principals-iot_devices.ipynb) notebook to the Databricks Workspace and open the notebook, attach & start the cluster created in step 12 and then run all cells: 131 |

132 | 133 |

134 | 135 | RESULT: Files unders /iot_devices folder are readable meanwhile /loans raise an exception due to lack of privileges to access these files on ADLS Gen2. These permissions are limited thanks to the privileges of app registration service principal created for iot_devices on ADLS Gen2 ACLs. 136 | 137 | 15. Navigate to the premium Azure Databricks Workspace > Overview on Azure Portal and click Launch Workspace button, choose and TestUser2 user to login. When Azure Databricks Workspace is displayed, navigate to Workspace, then upload [pattern4-Cluster-scoped-principals-loans.ipynb](notebooks/testuser2/pattern4-Cluster-scoped-principals-loans.ipynb) notebook to the Databricks Workspace and open the notebook, attach & start the cluster created in step 13 and then run all cells: 138 |

139 | 140 |

141 | 142 | RESULT: Files unders /loans folder are readable meanwhile /iot_devices raise an exception due to lack of privileges to access these files on ADLS Gen2. These permissions are limited thanks to the privileges of app registration service principal created for loans on ADLS Gen2 ACLs. 143 | 144 | ## License/Terms of Use 145 | 146 | This is a free white paper released into the public domain. 147 | 148 | Anyone is free to use or distribute this white paper, for any purpose, 149 | commercial or non-commercial, and by any means. 150 | 151 | THE WHITE PAPER IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, 152 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 153 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 154 | 155 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 156 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 157 | FROM, OUT OF OR IN CONNECTION WITH THE WHITE PAPER. 158 | 159 | -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_0.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_1.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_2.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_3.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_4.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_4_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_4_a.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_5.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_5_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_5_a.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_6.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_4_Cluster_scoped_Service_Principal/media/pattern_4_step_7.png -------------------------------------------------------------------------------- /tutorials/pattern_4_Cluster_scoped_Service_Principal/notebooks/testuser2/pattern4-Cluster-scoped-principals-loans.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"markdown","source":["# pattern 4- cluster scoped principals - loans\n\nhttps://docs.microsoft.com/en-us/azure/databricks/administration-guide/access-control/cluster-acl#--configure-cluster-level-permissions"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"af7f1460-22a4-4dcc-ab0c-e912712b1e74"}}},{"cell_type":"code","source":["# read data in delta format using direct path\nloansdf = spark.read\\\n .format(\"delta\")\\\n .load(\"abfss://test-container@rrdemostorageacc.dfs.core.windows.net/loans\")\n\ndisplay(loansdf)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"7a20b015-eb2c-4cbe-8fb0-c201e7484c30"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":true,"datasetInfos":[],"data":[[0,1000,182.22,"CA"],[1,1000,361.19,"WA"],[2,1000,176.26,"TX"],[3,1000,1000.0,"OK"],[4,1000,249.98,"PA"],[5,1000,408.6,"CA"],[6,1000,1000.0,"MD"],[7,1000,168.81,"OH"],[8,1000,193.64,"TX"],[9,1000,218.83,"CT"],[10,1000,322.37,"NJ"],[11,1000,400.61,"NY"],[12,1000,1000.0,"FL"],[13,1000,165.88,"NJ"],[14,1000,190.6,"TX"],[15,1000,1000.0,"OH"],[16,1000,213.72,"MI"],[17,1000,188.89,"MI"],[18,1000,237.41,"CA"],[19,1000,203.85,"CA"],[20,1000,1000.0,"NM"],[21,1000,66.39,"NY"],[22,1000,1000.0,"VA"],[23,1000,381.93,"AL"],[24,1000,147.91,"TX"],[25,1200,1200.0,"WA"],[26,1200,288.19,"CA"],[27,1200,516.99,"NV"],[28,1200,84.45,"NY"],[29,1200,677.96,"IN"],[30,1200,376.91,"OH"],[31,1200,1200.0,"NY"],[32,1200,79.62,"IL"],[33,1200,395.74,"AZ"],[34,1200,357.72,"MO"],[35,1275,1275.0,"MO"],[36,1275,285.26,"ID"],[37,1400,1400.0,"IL"],[38,1400,406.41,"PA"],[39,1400,1400.0,"OR"],[40,1450,575.78,"PA"],[41,1500,778.87,"AL"],[42,1500,328.7,"TX"],[43,1500,267.65,"MI"],[44,1500,112.59,"NY"],[45,1500,1500.0,"MO"],[46,1500,375.03,"TN"],[47,1500,654.03,"NJ"],[48,1500,555.59,"TX"],[49,1500,1500.0,"CA"],[50,1500,1411.83,"IL"],[51,1500,1500.0,"KS"],[52,1500,1057.36,"IN"],[53,1500,786.61,"CA"],[54,1500,99.54,"AL"],[55,1500,235.51,"TX"],[56,1500,821.76,"NY"],[57,1500,1500.0,"IL"],[58,1550,1550.0,"UT"],[59,1600,723.98,"NJ"],[60,1600,528.98,"IL"],[61,1600,526.27,"AL"],[62,1600,74.68,"NH"],[63,1600,767.82,"OH"],[64,1700,303.37,"MO"],[65,1700,1700.0,"AZ"],[66,1800,1800.0,"MO"],[67,1800,890.14,"TX"],[68,1800,653.3,"MS"],[69,1900,1900.0,"TX"],[70,1975,383.32,"TX"],[71,2000,1614.9,"NY"],[72,2000,356.89,"TN"],[73,2000,1052.86,"VA"],[74,2000,149.09,"LA"],[75,2000,99.06,"CA"],[76,2000,1163.66,"PA"],[77,2000,2000.0,"CA"],[78,2000,2000.0,"NJ"],[79,2000,1970.57,"TN"],[80,2000,143.94,"OH"],[81,2000,494.31,"TX"],[82,2000,494.3,"IL"],[83,2000,2000.0,"IN"],[84,2000,142.94,"NY"],[85,2000,1259.27,"TX"],[86,2000,1573.02,"MI"],[87,2000,141.94,"ID"],[88,2000,387.44,"TX"],[89,2000,337.46,"NH"],[90,2000,751.19,"MN"],[91,2000,488.7,"TX"],[92,2000,488.69,"KS"],[93,2000,387.19,"NE"],[94,2000,2000.0,"OK"],[95,2000,437.71,"IN"],[96,2000,855.79,"FL"],[97,2000,801.34,"OK"],[98,2000,2000.0,"MD"],[99,2000,2000.0,"TX"],[100,2000,2000.0,"FL"],[101,2000,2000.0,"NV"],[102,2000,381.19,"CA"],[103,2000,2000.0,"AK"],[104,2000,636.01,"TX"],[105,2000,377.71,"MI"],[106,2000,477.53,"MI"],[107,2000,377.71,"WY"],[108,2000,1006.5,"KY"],[109,2000,1066.88,"CA"],[110,2000,1066.88,"FL"],[111,2000,136.68,"WA"],[112,2000,1235.56,"GA"],[113,2000,938.5,"CA"],[114,2000,770.85,"LA"],[115,2000,2000.0,"FL"],[116,2000,1860.58,"FL"],[117,2000,927.56,"KY"],[118,2000,2000.0,"NY"],[119,2000,128.73,"VA"],[120,2000,2000.0,"CA"],[121,2100,696.95,"ID"],[122,2100,1025.23,"AL"],[123,2100,753.7,"TX"],[124,2200,53.34,"LA"],[125,2200,425.87,"SC"],[126,2200,364.98,"PA"],[127,2200,2200.0,"IL"],[128,2200,1330.82,"TX"],[129,2300,115.19,"LA"],[130,2300,172.63,"TN"],[131,2300,503.58,"CA"],[132,2300,2300.0,"OH"],[133,2300,912.91,"MS"],[134,2375,2375.0,"NC"],[135,2400,553.86,"NY"],[136,2400,599.94,"WI"],[137,2400,2400.0,"PA"],[138,2400,524.09,"NC"],[139,2400,1222.09,"NY"],[140,2400,525.24,"CA"],[141,2400,198.94,"SC"],[142,2400,2400.0,"WI"],[143,2400,1500.51,"VA"],[144,2400,2400.0,"AL"],[145,2400,763.25,"TX"],[146,2400,1284.56,"NY"],[147,2400,453.21,"CA"],[148,2400,2400.0,"NY"],[149,2400,2400.0,"AL"],[150,2400,2400.0,"FL"],[151,2400,1271.64,"ME"],[152,2400,790.18,"FL"],[153,2500,1472.04,"PA"],[154,2500,2500.0,"MS"],[155,2500,638.54,"FL"],[156,2500,870.18,"CA"],[157,2500,2500.0,"VA"],[158,2500,568.52,"VA"],[159,2500,628.51,"NY"],[160,2500,1027.25,"AL"],[161,2500,2091.65,"TX"],[162,2500,1090.3,"NV"],[163,2500,1298.63,"VA"],[164,2500,1219.13,"FL"],[165,2500,872.09,"KY"],[166,2500,421.41,"OH"],[167,2500,1417.8,"MI"],[168,2500,1202.49,"IN"],[169,2500,538.98,"OH"],[170,2500,534.11,"SC"],[171,2500,2500.0,"IN"],[172,2500,453.46,"NY"],[173,2500,2500.0,"MN"],[174,2500,760.49,"VA"],[175,2525,1469.46,"NJ"],[176,2625,1064.43,"VT"],[177,2650,2650.0,"FL"],[178,2650,1352.52,"NY"],[179,2700,192.94,"VA"],[180,2700,2700.0,"IN"],[181,2700,179.12,"LA"],[182,2700,358.17,"AR"],[183,2725,189.65,"NV"],[184,2800,2800.0,"TX"],[185,2800,1629.23,"IN"],[186,2800,540.0,"PA"],[187,2800,1454.54,"KY"],[188,2800,1058.66,"NJ"],[189,2800,1365.41,"NY"],[190,2800,2800.0,"TX"],[191,2800,897.74,"WI"],[192,2800,391.11,"UT"],[193,2800,2800.0,"WI"],[194,2800,123.0,"CA"],[195,2825,303.63,"PA"],[196,2875,1568.71,"WI"],[197,2975,1910.92,"AL"],[198,2975,558.28,"MS"],[199,2975,188.74,"CA"],[200,3000,1875.57,"CA"],[201,3000,1215.9,"TX"],[202,3000,1207.98,"MN"],[203,3000,861.53,"TX"],[204,3000,1192.32,"SC"],[205,3000,1691.84,"CA"],[206,3000,3000.0,"KY"],[207,3000,232.18,"KY"],[208,3000,535.3,"WA"],[209,3000,690.32,"KS"],[210,3000,611.76,"TX"],[211,3000,225.61,"TX"],[212,3000,3000.0,"MI"],[213,3000,688.27,"TX"],[214,3000,723.51,"NJ"],[215,3000,3000.0,"MN"],[216,3000,223.61,"NY"],[217,3000,223.62,"TX"],[218,3000,684.26,"PA"],[219,3000,3000.0,"IL"],[220,3000,1660.37,"AZ"],[221,3000,527.01,"LA"],[222,3000,604.38,"TX"],[223,3000,604.38,"CT"],[224,3000,1573.66,"CA"],[225,3000,2812.87,"CA"],[226,3000,1240.99,"NY"],[227,3000,1397.45,"GA"],[228,3000,3000.0,"LA"],[229,3000,1907.47,"VA"],[230,3000,1734.87,"GA"],[231,3000,1066.87,"MS"],[232,3000,1726.01,"TX"],[233,3000,664.37,"MN"],[234,3000,3000.0,"WI"],[235,3000,3000.0,"OH"],[236,3000,741.05,"SD"],[237,3000,1464.51,"PA"],[238,3000,1053.93,"TX"],[239,3000,3000.0,"TN"],[240,3000,3000.0,"MD"],[241,3000,581.11,"NY"],[242,3000,650.64,"OH"],[243,3000,1045.73,"HI"],[244,3000,1289.44,"CA"],[245,3000,505.71,"RI"],[246,3000,966.85,"MD"],[247,3000,656.5,"PA"],[248,3000,3000.0,"CA"],[249,3000,1046.3,"AZ"],[250,3000,505.71,"AZ"],[251,3000,1623.81,"IN"],[252,3000,1874.04,"NJ"],[253,3000,559.63,"TX"],[254,3000,1283.69,"NE"],[255,3000,2249.47,"IL"],[256,3000,882.07,"NC"],[257,3000,3000.0,"NJ"],[258,3000,1701.17,"MO"],[259,3000,209.15,"MN"],[260,3000,209.15,"TN"],[261,3000,3000.0,"CO"],[262,3000,571.79,"NC"],[263,3000,3000.0,"FL"],[264,3000,953.96,"TX"],[265,3000,2801.21,"PA"],[266,3000,1272.72,"FL"],[267,3000,1689.94,"IL"],[268,3000,2800.23,"FL"],[269,3000,1517.58,"AZ"],[270,3000,3000.0,"CO"],[271,3000,492.9,"KY"],[272,3000,492.9,"TX"],[273,3000,492.9,"CT"],[274,3000,3000.0,"NV"],[275,3000,562.93,"IL"],[276,3000,637.04,"TX"],[277,3000,942.21,"WI"],[278,3000,562.93,"CO"],[279,3000,1175.6,"PA"],[280,3000,629.44,"NY"],[281,3000,1273.89,"NJ"],[282,3000,926.7,"CA"],[283,3000,3000.0,"NH"],[284,3000,1665.48,"WI"],[285,3000,3000.0,"MN"],[286,3000,1316.07,"IL"],[287,3000,470.96,"PA"],[288,3000,1064.1,"NJ"],[289,3000,3000.0,"TX"],[290,3000,1650.62,"MT"],[291,3000,1388.06,"FL"],[292,3000,3000.0,"PA"],[293,3000,1135.42,"NY"],[294,3000,3000.0,"NV"],[295,3000,526.05,"IL"],[296,3000,811.66,"NJ"],[297,3000,1048.57,"CO"],[298,3000,364.07,"OK"],[299,3000,1065.43,"CA"],[300,3000,614.69,"TX"],[301,3025,1267.09,"GA"],[302,3025,773.57,"GA"],[303,3025,516.45,"GA"],[304,3025,214.69,"GA"],[305,3025,1300.11,"GA"],[306,3025,2066.13,"GA"],[307,3025,404.14,"TX"],[308,3025,891.96,"NY"],[309,3025,656.78,"GA"],[310,3025,637.2,"CA"],[311,3050,1007.02,"IL"],[312,3100,3100.0,"IL"],[313,3100,3100.0,"NY"],[314,3125,1053.94,"TN"],[315,3125,198.37,"CA"],[316,3150,645.23,"CA"],[317,3200,644.46,"MI"],[318,3200,3200.0,"NH"],[319,3200,3200.0,"PA"],[320,3200,1296.48,"MD"],[321,3200,460.12,"OH"],[322,3200,460.11,"TX"],[323,3200,763.92,"VA"],[324,3200,3200.0,"AZ"],[325,3200,1673.39,"NV"],[326,3200,172.61,"PA"],[327,3250,1302.32,"VA"],[328,3300,806.26,"TX"],[329,3300,3080.29,"NY"],[330,3350,252.31,"NJ"],[331,3350,1937.32,"AZ"],[332,3400,3400.0,"CA"],[333,3400,2062.45,"OH"],[334,3425,3425.0,"TX"],[335,3450,237.51,"TX"],[336,3500,3500.0,"NY"],[337,3500,1281.06,"WI"],[338,3500,1658.72,"GA"],[339,3500,3500.0,"MD"],[340,3500,3500.0,"NJ"],[341,3500,3500.0,"AL"],[342,3500,3500.0,"CO"],[343,3500,887.44,"PA"],[344,3500,3281.75,"IL"],[345,3500,1248.58,"OR"],[346,3500,3500.0,"CA"],[347,3500,168.43,"NC"],[348,3500,3500.0,"CA"],[349,3500,3500.0,"GA"],[350,3500,1229.56,"OH"],[351,3500,250.12,"IL"],[352,3500,248.21,"AR"],[353,3500,855.07,"CA"],[354,3500,853.31,"CA"],[355,3500,1677.48,"CO"],[356,3500,3500.0,"TX"],[357,3500,1679.58,"FL"],[358,3500,3266.87,"MO"],[359,3500,240.93,"AL"],[360,3500,3500.0,"MO"],[361,3500,291.53,"TX"],[362,3500,240.58,"NV"],[363,3500,1515.0,"TX"],[364,3500,3500.0,"CA"],[365,3500,1466.27,"NY"],[366,3500,1371.49,"NJ"],[367,3550,1952.11,"FL"],[368,3600,1529.58,"KY"],[369,3600,291.55,"FL"],[370,3600,640.38,"NC"],[371,3600,3600.0,"CO"],[372,3600,3600.0,"TN"],[373,3600,259.1,"OH"],[374,3600,917.23,"LA"],[375,3600,787.76,"CA"],[376,3600,3600.0,"MO"],[377,3600,3600.0,"MS"],[378,3600,966.83,"AZ"],[379,3600,1962.77,"FL"],[380,3600,3600.0,"TX"],[381,3600,2245.97,"RI"],[382,3600,776.05,"OH"],[383,3600,1727.7,"WA"],[384,3600,1758.34,"OH"],[385,3600,675.57,"PA"],[386,3600,675.57,"AL"],[387,3600,849.44,"GA"],[388,3600,3600.0,"TX"],[389,3600,1701.56,"NY"],[390,3600,3600.0,"CA"],[391,3600,580.05,"NV"],[392,3600,1018.75,"OK"],[393,3625,1239.53,"GA"],[394,3650,1504.95,"CA"],[395,3700,1817.82,"MD"],[396,3750,3750.0,"MN"],[397,3750,1953.35,"NY"],[398,3750,229.41,"IL"],[399,3775,1561.18,"NV"],[400,3800,1500.4,"NY"],[401,3800,2109.52,"OR"],[402,3800,3800.0,"CO"],[403,3800,1960.28,"TX"],[404,3825,3825.0,"VA"],[405,3850,967.97,"KS"],[406,3900,3900.0,"FL"],[407,3925,1604.31,"LA"],[408,4000,2436.76,"CT"],[409,4000,2442.62,"TX"],[410,4000,610.36,"FL"],[411,4000,819.0,"KY"],[412,4000,2580.85,"IL"],[413,4000,1451.57,"MS"],[414,4000,1341.04,"WA"],[415,4000,1771.82,"WI"],[416,4000,4000.0,"MI"],[417,4000,4000.0,"NJ"],[418,4000,818.5,"MN"],[419,4000,805.75,"IL"],[420,4000,1322.31,"IN"],[421,4000,1753.04,"IL"],[422,4000,1426.99,"AR"],[423,4000,1426.99,"CA"],[424,4000,4000.0,"CA"],[425,4000,4000.0,"GA"],[426,4000,1974.46,"NJ"],[427,4000,2199.4,"OH"],[428,4000,999.94,"NY"],[429,4000,1417.61,"TX"],[430,4000,1634.95,"WY"],[431,4000,287.6,"AL"],[432,4000,988.6,"CA"],[433,4000,1580.95,"WA"],[434,4000,1302.35,"OH"],[435,4000,2286.99,"NY"],[436,4000,2517.95,"CA"],[437,4000,880.53,"CA"],[438,4000,1018.67,"NJ"],[439,4000,775.26,"FL"],[440,4000,774.74,"TX"],[441,4000,4000.0,"OH"],[442,4000,875.78,"NJ"],[443,4000,771.36,"NY"],[444,4000,1289.08,"TX"],[445,4000,1289.08,"NY"],[446,4000,1289.08,"NJ"],[447,4000,2279.34,"GA"],[448,4000,2279.34,"TX"],[449,4000,4000.0,"CA"],[450,4000,4000.0,"NY"],[451,4000,4000.0,"IN"],[452,4000,663.53,"CO"],[453,4000,4000.0,"OR"],[454,4000,4000.0,"TX"],[455,4000,1588.38,"NY"],[456,4000,4000.0,"NC"],[457,4000,2253.42,"FL"],[458,4000,1264.72,"VA"],[459,4000,3733.92,"OH"],[460,4000,1911.28,"GA"],[461,4000,657.18,"NC"],[462,4000,2864.13,"WI"],[463,4000,1687.27,"IL"],[464,4000,1365.43,"MI"],[465,4000,4000.0,"PA"],[466,4000,750.22,"GA"],[467,4000,2129.08,"CA"],[468,4000,1567.24,"NM"],[469,4000,943.86,"IL"],[470,4000,4000.0,"NC"],[471,4000,4000.0,"NE"],[472,4000,2924.04,"NJ"],[473,4000,4000.0,"TN"],[474,4000,269.13,"GA"],[475,4000,1448.55,"HI"],[476,4000,4000.0,"TN"],[477,4000,3725.68,"FL"],[478,4000,2220.47,"CA"],[479,4000,4000.0,"NJ"],[480,4000,1336.63,"NY"],[481,4000,731.6,"FL"],[482,4000,1649.35,"TX"],[483,4000,4000.0,"PA"],[484,4000,1320.58,"OR"],[485,4000,4000.0,"LA"],[486,4000,4000.0,"CT"],[487,4000,4000.0,"WV"],[488,4000,2188.15,"TX"],[489,4000,1204.71,"MI"],[490,4000,1408.18,"FL"],[491,4000,895.0,"OH"],[492,4000,1503.47,"AK"],[493,4000,1610.58,"NY"],[494,4000,507.64,"FL"],[495,4000,2195.32,"NY"],[496,4050,1531.34,"MO"],[497,4050,1392.3,"TX"],[498,4150,849.64,"OH"],[499,4150,288.83,"FL"],[500,4200,1100.48,"MA"],[501,4200,1784.51,"NJ"],[502,4200,4200.0,"CA"],[503,4200,1642.89,"FL"],[504,4200,2210.61,"TX"],[505,4200,1852.53,"MA"],[506,4200,2068.7,"NY"],[507,4200,2428.92,"NE"],[508,4200,1367.47,"CA"],[509,4200,708.38,"CO"],[510,4200,1682.97,"FL"],[511,4200,4200.0,"CA"],[512,4200,1016.05,"FL"],[513,4200,2121.16,"WA"],[514,4200,2601.53,"PA"],[515,4225,853.74,"IN"],[516,4250,1633.9,"NY"],[517,4300,1704.48,"WI"],[518,4375,4375.0,"TN"],[519,4400,4400.0,"NH"],[520,4450,820.26,"AR"],[521,4450,1699.49,"CA"],[522,4450,1625.34,"GA"],[523,4475,1623.23,"VT"],[524,4475,1785.86,"RI"],[525,4500,1946.02,"AZ"],[526,4500,4500.0,"TX"],[527,4500,339.06,"OH"],[528,4500,4500.0,"MN"],[529,4500,1580.82,"CA"],[530,4500,4500.0,"OH"],[531,4500,522.12,"MO"],[532,4500,2208.2,"CA"],[533,4500,2336.35,"NC"],[534,4500,4500.0,"AL"],[535,4500,1304.97,"TN"],[536,4525,1054.09,"OH"],[537,4550,880.72,"MS"],[538,4550,1972.49,"TX"],[539,4550,985.29,"NE"],[540,4575,1521.23,"MO"],[541,4675,2577.73,"KY"],[542,4675,2539.53,"OH"],[543,4700,1279.09,"TX"],[544,4725,2386.11,"CA"],[545,4750,2020.38,"MI"],[546,4750,1952.18,"AL"],[547,4800,4800.0,"NJ"],[548,4800,2962.43,"DC"],[549,4800,889.53,"MO"],[550,4800,874.52,"CA"],[551,4800,3952.49,"CO"],[552,4800,1907.7,"CA"],[553,4800,371.49,"SC"],[554,4800,2039.44,"OH"],[555,4800,362.4,"OH"],[556,4800,2399.26,"RI"],[557,4800,2797.95,"IL"],[558,4800,964.92,"NY"],[559,4800,966.93,"IL"],[560,4800,1091.56,"MA"],[561,4800,4800.0,"MA"],[562,4800,2117.19,"NY"],[563,4800,1961.94,"TX"],[564,4800,227.65,"NC"],[565,4800,3038.23,"IL"],[566,4800,1174.08,"AL"],[567,4800,1546.93,"AZ"],[568,4800,809.12,"TX"],[569,4800,2735.11,"MD"],[570,4800,2873.64,"MI"],[571,4800,2721.97,"TX"],[572,4800,2443.23,"GA"],[573,4800,334.63,"FL"],[574,4800,4800.0,"TX"],[575,4800,900.67,"NV"],[576,4800,1625.57,"FL"],[577,4800,658.8,"TX"],[578,4800,213.97,"MD"],[579,4800,2251.77,"MO"],[580,4800,4800.0,"UT"],[581,4825,2405.54,"MI"],[582,4825,1422.59,"CA"],[583,4900,2002.89,"NJ"],[584,4925,2627.0,"NJ"],[585,4925,1162.06,"TX"],[586,4950,958.17,"OH"],[587,4975,325.3,"AZ"],[588,4975,1869.83,"CA"],[589,5000,2003.58,"CA"],[590,5000,1137.04,"NC"],[591,5000,1140.09,"FL"],[592,5000,5000.0,"TX"],[593,5000,386.95,"CA"],[594,5000,2819.66,"NM"],[595,5000,1043.39,"TX"],[596,5000,5000.0,"CA"],[597,5000,2262.19,"CA"],[598,5000,5000.0,"CA"],[599,5000,5000.0,"NY"],[600,5000,2517.94,"NH"],[601,5000,2517.94,"MA"],[602,5000,2103.01,"CO"],[603,5000,5000.0,"FL"],[604,5000,2094.18,"GA"],[605,5000,892.19,"VA"],[606,5000,1022.55,"FL"],[607,5000,3515.81,"MO"],[608,5000,250.41,"CA"],[609,5000,889.38,"PA"],[610,5000,889.38,"FL"],[611,5000,5000.0,"IL"],[612,5000,5000.0,"AZ"],[613,5000,3471.91,"NY"],[614,5000,3033.67,"TX"],[615,5000,2925.85,"NY"],[616,5000,2925.85,"WV"],[617,5000,1814.35,"CA"],[618,5000,1814.35,"IL"],[619,5000,1814.35,"CA"],[620,5000,1814.35,"CA"],[621,5000,1147.06,"NY"],[622,5000,5000.0,"NE"],[623,5000,5000.0,"GA"],[624,5000,5000.0,"DE"],[625,5000,2914.11,"TN"],[626,5000,2077.42,"OK"],[627,5000,1669.45,"NV"],[628,5000,5000.0,"CT"],[629,5000,2909.17,"FL"],[630,5000,2909.17,"TX"],[631,5000,1400.26,"SC"],[632,5000,2767.29,"FL"],[633,5000,5000.0,"WA"],[634,5000,1231.62,"PA"],[635,5000,878.27,"WA"],[636,5000,5000.0,"CA"],[637,5000,1797.09,"NY"],[638,5000,1797.09,"TX"],[639,5000,2068.43,"TX"],[640,5000,2480.7,"CO"],[641,5000,5000.0,"CA"],[642,5000,1652.78,"FL"],[643,5000,1804.16,"NE"],[644,5000,1257.01,"NY"],[645,5000,1652.78,"OH"],[646,5000,2054.5,"MA"],[647,5000,5000.0,"AZ"],[648,5000,2054.5,"IN"],[649,5000,2191.12,"WA"],[650,5000,2054.5,"MI"],[651,5000,2054.5,"NY"],[652,5000,1783.81,"FL"],[653,5000,2468.08,"MA"],[654,5000,5000.0,"CA"],[655,5000,3068.64,"FL"],[656,5000,2749.25,"NY"],[657,5000,3177.23,"OH"],[658,5000,3428.76,"IL"],[659,5000,5000.0,"FL"],[660,5000,1644.51,"LA"],[661,5000,3648.47,"NY"],[662,5000,1778.12,"KY"],[663,5000,5000.0,"AZ"],[664,5000,1644.51,"IN"],[665,5000,2043.68,"MI"],[666,5000,2062.67,"NY"],[667,5000,1773.09,"NY"],[668,5000,2180.41,"IN"],[669,5000,2180.26,"CA"],[670,5000,2180.41,"NH"],[671,5000,2597.26,"SC"],[672,5000,2597.26,"OH"],[673,5000,2943.48,"IL"],[674,5000,1372.84,"FL"],[675,5000,2734.31,"KY"],[676,5000,239.81,"LA"],[677,5000,359.85,"WI"],[678,5000,359.85,"MO"],[679,5000,481.39,"OK"],[680,5000,5000.0,"CA"],[681,5000,5000.0,"MD"],[682,5000,359.47,"OH"],[683,5000,979.85,"FL"],[684,5000,1967.9,"NC"],[685,5000,5000.0,"NV"],[686,5000,1627.8,"CO"],[687,5000,5000.0,"WA"],[688,5000,2025.76,"PA"],[689,5000,2025.76,"CT"],[690,5000,1756.45,"WA"],[691,5000,2438.26,"NY"],[692,5000,2438.26,"OH"],[693,5000,357.32,"TX"],[694,5000,4677.11,"MI"],[695,5000,3148.05,"VA"],[696,5000,2858.61,"VA"],[697,5000,1751.91,"NM"],[698,5000,1619.52,"WI"],[699,5000,1751.27,"FL"],[700,5000,3282.44,"CA"],[701,5000,1228.57,"IN"],[702,5000,5000.0,"CA"],[703,5000,5000.0,"NC"],[704,5000,843.86,"MD"],[705,5000,843.86,"CA"],[706,5000,5000.0,"MD"],[707,5000,1222.95,"NY"],[708,5000,969.03,"MN"],[709,5000,843.33,"CA"],[710,5000,843.33,"IN"],[711,5000,5000.0,"PA"],[712,5000,843.33,"NY"],[713,5000,1094.75,"MI"],[714,5000,843.33,"IN"],[715,5000,968.44,"PA"],[716,5000,2012.69,"NV"],[717,5000,5000.0,"AZ"],[718,5000,719.1,"MI"],[719,5000,2012.69,"TX"],[720,5000,5000.0,"CA"],[721,5000,1744.04,"NE"],[722,5000,1221.74,"NC"],[723,5000,474.71,"TX"],[724,5000,842.41,"MI"],[725,5000,967.85,"WY"],[726,5000,967.85,"GA"],[727,5000,5000.0,"LA"],[728,5000,1180.77,"MD"],[729,5000,718.93,"CA"],[730,5000,1094.1,"IL"],[731,5000,967.86,"NY"],[732,5000,842.81,"NY"],[733,5000,842.8,"NJ"],[734,5000,5000.0,"WI"],[735,5000,2706.19,"NV"],[736,5000,1872.56,"IL"],[737,5000,4674.41,"NY"],[738,5000,5000.0,"CA"],[739,5000,2415.79,"TX"],[740,5000,5000.0,"IL"],[741,5000,2003.37,"MD"],[742,5000,2003.37,"IL"],[743,5000,1210.36,"NY"],[744,5000,5000.0,"NY"],[745,5000,2692.22,"IL"],[746,5000,2834.25,"PA"],[747,5000,2835.36,"GA"],[748,5000,2835.36,"PA"],[749,5000,5000.0,"WI"],[750,5000,5000.0,"NY"],[751,5000,3408.03,"FL"],[752,5000,231.16,"CA"],[753,5000,5000.0,"NJ"],[754,5000,3119.24,"VA"],[755,5000,2684.95,"NV"],[756,5000,1204.08,"PA"],[757,5000,5000.0,"NY"],[758,5000,1204.07,"CA"],[759,5000,829.41,"FL"],[760,5000,952.98,"NJ"],[761,5000,829.41,"IN"],[762,5000,465.43,"NY"],[763,5000,1721.61,"PA"],[764,5000,585.42,"CA"],[765,5000,1589.98,"AZ"],[766,5000,1589.98,"CA"],[767,5000,5000.0,"CO"],[768,5000,1985.05,"GA"],[769,5000,2121.29,"LA"],[770,5000,1746.82,"TX"],[771,5000,5000.0,"FL"],[772,5000,2676.01,"DC"],[773,5000,2534.18,"NY"],[774,5000,2816.65,"VA"],[775,5000,3108.3,"CA"],[776,5000,2673.01,"TN"],[777,5000,3406.19,"CA"],[778,5000,1193.66,"NC"],[779,5000,944.14,"CA"],[780,5000,5000.0,"WA"],[781,5000,944.14,"FL"],[782,5000,1578.57,"NJ"],[783,5000,700.13,"WI"],[784,5000,344.2,"WA"],[785,5000,2108.09,"NV"],[786,5000,5000.0,"MS"],[787,5000,2108.9,"CA"],[788,5000,2666.97,"HI"],[789,5000,5000.0,"NY"],[790,5000,2384.78,"OH"],[791,5000,5000.0,"VA"],[792,5000,1186.72,"NJ"],[793,5000,1186.72,"AK"],[794,5000,5000.0,"PA"],[795,5000,1568.22,"NY"],[796,5000,816.15,"NJ"],[797,5000,816.15,"NY"],[798,5000,1890.65,"MD"],[799,5000,3097.08,"TN"],[800,5000,5000.0,"CT"],[801,5000,1956.57,"TX"],[802,5000,5000.0,"SC"],[803,5000,4496.36,"NY"],[804,5000,1552.32,"TX"],[805,5000,805.65,"NY"],[806,5000,686.23,"GA"],[807,5000,336.41,"NY"],[808,5000,1943.68,"MO"],[809,5000,1944.13,"CA"],[810,5000,1944.12,"CA"],[811,5000,1544.45,"WA"],[812,5000,1544.45,"FL"],[813,5000,5000.0,"WI"],[814,5000,795.24,"NC"],[815,5000,1159.28,"OR"],[816,5000,1786.24,"NY"],[817,5000,5000.0,"NM"],[818,5000,1650.88,"CA"],[819,5000,5000.0,"NM"],[820,5000,213.58,"CA"],[821,5000,5000.0,"OH"],[822,5000,2026.3,"TX"],[823,5000,1877.86,"CA"],[824,5000,754.43,"NY"],[825,5000,5000.0,"CA"],[826,5000,287.7,"CA"],[827,5050,1822.02,"FL"],[828,5050,1761.5,"FL"],[829,5050,1088.66,"GA"],[830,5100,1998.39,"NY"],[831,5100,3238.26,"NY"],[832,5100,833.81,"IL"],[833,5125,380.54,"WI"],[834,5125,5125.0,"GA"],[835,5200,4872.59,"CT"],[836,5200,4867.83,"CT"],[837,5200,1137.92,"CA"],[838,5200,3244.04,"MI"],[839,5200,5200.0,"FL"],[840,5200,109.57,"AL"],[841,5275,2473.35,"CA"],[842,5275,1726.48,"AR"],[843,5300,1246.97,"NC"],[844,5300,400.6,"TX"],[845,5300,853.23,"OH"],[846,5325,1064.02,"NJ"],[847,5350,5350.0,"MD"],[848,5375,1141.34,"OK"],[849,5375,971.18,"WA"],[850,5375,201.62,"WI"],[851,5400,3197.27,"VA"],[852,5400,1821.23,"TN"],[853,5400,5400.0,"NJ"],[854,5400,383.23,"NC"],[855,5400,910.23,"CA"],[856,5400,639.58,"MD"],[857,5400,2551.69,"IL"],[858,5425,2217.67,"TX"],[859,5450,3505.54,"IL"],[860,5450,2015.06,"IL"],[861,5500,2464.58,"VA"],[862,5500,2056.83,"NC"],[863,5500,2488.45,"VA"],[864,5500,5500.0,"NV"],[865,5500,3062.49,"PA"],[866,5500,2740.67,"IL"],[867,5500,5500.0,"NY"],[868,5500,1351.46,"NJ"],[869,5500,1772.5,"OH"],[870,5500,2333.51,"DE"],[871,5500,5500.0,"TX"],[872,5500,2940.62,"AR"],[873,5500,3418.5,"NC"],[874,5500,1839.18,"AR"],[875,5525,5525.0,"SD"],[876,5550,2268.61,"NY"],[877,5575,348.61,"CA"],[878,5600,5600.0,"VA"],[879,5600,2300.94,"OH"],[880,5600,1997.79,"CA"],[881,5600,2764.1,"IL"],[882,5600,5600.0,"MN"],[883,5600,1986.34,"NE"],[884,5600,5600.0,"MD"],[885,5600,5600.0,"HI"],[886,5600,1961.29,"MN"],[887,5600,5600.0,"OH"],[888,5600,928.93,"NY"],[889,5600,928.93,"MN"],[890,5600,5600.0,"WA"],[891,5600,1862.16,"NY"],[892,5600,5212.88,"MD"],[893,5600,5600.0,"VA"],[894,5600,2094.18,"NY"],[895,5625,1055.48,"TN"],[896,5700,1262.21,"NY"],[897,5700,5700.0,"GA"],[898,5700,2350.24,"NY"],[899,5700,1653.81,"WI"],[900,5750,3325.17,"MN"],[901,5825,769.02,"GA"],[902,5825,1877.16,"TX"],[903,5825,5428.77,"OH"],[904,5825,1177.99,"CO"],[905,5850,2807.36,"MN"],[906,5925,2613.06,"NJ"],[907,5925,1319.57,"PA"],[908,5950,2172.28,"PA"],[909,6000,2503.88,"OH"],[910,6000,2452.77,"SC"],[911,6000,2252.11,"VA"],[912,6000,1111.87,"CA"],[913,6000,6000.0,"FL"],[914,6000,1183.8,"NY"],[915,6000,2234.24,"IL"],[916,6000,1023.4,"SC"],[917,6000,1252.02,"SC"],[918,6000,2549.14,"CA"],[919,6000,1252.02,"MI"],[920,6000,6000.0,"OR"],[921,6000,2549.14,"CO"],[922,6000,2549.14,"TX"],[923,6000,2549.14,"CA"],[924,6000,2549.13,"OH"],[925,6000,6000.0,"VA"],[926,6000,5637.76,"CA"],[927,6000,3532.84,"IL"],[928,6000,4961.58,"NY"],[929,6000,2523.52,"CO"],[930,6000,2688.58,"IL"],[931,6000,3570.23,"CA"],[932,6000,6000.0,"TX"],[933,6000,1070.59,"NY"],[934,6000,3010.27,"TX"],[935,6000,6000.0,"CA"],[936,6000,1067.27,"CA"],[937,6000,1538.61,"CT"],[938,6000,2506.68,"IL"],[939,6000,6000.0,"OH"],[940,6000,2173.8,"NY"],[941,6000,2177.28,"VA"],[942,6000,2503.88,"LA"],[943,6000,2177.28,"MI"],[944,6000,2668.69,"AZ"],[945,6000,1219.68,"WA"],[946,6000,1376.44,"NY"],[947,6000,1532.52,"MI"],[948,6000,904.37,"OH"],[949,6000,3840.98,"NJ"],[950,6000,3840.98,"MA"],[951,6000,2657.63,"GA"],[952,6000,2989.66,"FL"],[953,6000,447.23,"PA"],[954,6000,3259.17,"GA"],[955,6000,2003.45,"NY"],[956,6000,2164.84,"TX"],[957,6000,447.12,"PA"],[958,6000,6000.0,"NJ"],[959,6000,447.11,"MN"],[960,6000,1208.67,"AZ"],[961,6000,1364.46,"FL"],[962,6000,6000.0,"AR"],[963,6000,1515.27,"WA"],[964,6000,2201.19,"NV"],[965,6000,2629.57,"NJ"],[966,6000,2629.57,"CA"],[967,6000,2465.3,"IL"],[968,6000,2465.3,"OR"],[969,6000,2465.3,"NJ"],[970,6000,2629.57,"KY"],[971,6000,3450.57,"NY"],[972,6000,2961.69,"NY"],[973,6000,3299.1,"MI"],[974,6000,3299.1,"CA"],[975,6000,3814.95,"OR"],[976,6000,2031.1,"NJ"],[977,6000,2616.49,"NY"],[978,6000,2128.22,"CA"],[979,6000,2947.74,"CA"],[980,6000,2947.74,"NY"],[981,6000,3111.55,"CA"],[982,6000,6000.0,"FL"],[983,6000,1322.03,"NY"],[984,6000,1482.8,"MT"],[985,6000,2594.63,"FL"],[986,6000,2107.71,"NY"],[987,6000,428.76,"CA"],[988,6000,1943.5,"FL"],[989,6000,6000.0,"IL"],[990,6000,1012.67,"NY"],[991,6000,1314.53,"UT"],[992,6000,1162.15,"DE"],[993,6000,1466.73,"OH"],[994,6000,1012.01,"CA"],[995,6000,1012.01,"AZ"],[996,6000,1466.73,"AZ"],[997,6000,2414.39,"CA"],[998,6000,2578.88,"OK"],[999,6000,2415.37,"FL"]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"loan_id","type":"\"long\"","metadata":"{}"},{"name":"funded_amnt","type":"\"integer\"","metadata":"{}"},{"name":"paid_amnt","type":"\"double\"","metadata":"{}"},{"name":"addr_state","type":"\"string\"","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["
loan_idfunded_amntpaid_amntaddr_state
01000182.22CA
11000361.19WA
21000176.26TX
310001000.0OK
41000249.98PA
51000408.6CA
610001000.0MD
71000168.81OH
81000193.64TX
91000218.83CT
101000322.37NJ
111000400.61NY
1210001000.0FL
131000165.88NJ
141000190.6TX
1510001000.0OH
161000213.72MI
171000188.89MI
181000237.41CA
191000203.85CA
2010001000.0NM
21100066.39NY
2210001000.0VA
231000381.93AL
241000147.91TX
2512001200.0WA
261200288.19CA
271200516.99NV
28120084.45NY
291200677.96IN
301200376.91OH
3112001200.0NY
32120079.62IL
331200395.74AZ
341200357.72MO
3512751275.0MO
361275285.26ID
3714001400.0IL
381400406.41PA
3914001400.0OR
401450575.78PA
411500778.87AL
421500328.7TX
431500267.65MI
441500112.59NY
4515001500.0MO
461500375.03TN
471500654.03NJ
481500555.59TX
4915001500.0CA
5015001411.83IL
5115001500.0KS
5215001057.36IN
531500786.61CA
54150099.54AL
551500235.51TX
561500821.76NY
5715001500.0IL
5815501550.0UT
591600723.98NJ
601600528.98IL
611600526.27AL
62160074.68NH
631600767.82OH
641700303.37MO
6517001700.0AZ
6618001800.0MO
671800890.14TX
681800653.3MS
6919001900.0TX
701975383.32TX
7120001614.9NY
722000356.89TN
7320001052.86VA
742000149.09LA
75200099.06CA
7620001163.66PA
7720002000.0CA
7820002000.0NJ
7920001970.57TN
802000143.94OH
812000494.31TX
822000494.3IL
8320002000.0IN
842000142.94NY
8520001259.27TX
8620001573.02MI
872000141.94ID
882000387.44TX
892000337.46NH
902000751.19MN
912000488.7TX
922000488.69KS
932000387.19NE
9420002000.0OK
952000437.71IN
962000855.79FL
972000801.34OK
9820002000.0MD
9920002000.0TX
10020002000.0FL
10120002000.0NV
1022000381.19CA
10320002000.0AK
1042000636.01TX
1052000377.71MI
1062000477.53MI
1072000377.71WY
10820001006.5KY
10920001066.88CA
11020001066.88FL
1112000136.68WA
11220001235.56GA
1132000938.5CA
1142000770.85LA
11520002000.0FL
11620001860.58FL
1172000927.56KY
11820002000.0NY
1192000128.73VA
12020002000.0CA
1212100696.95ID
12221001025.23AL
1232100753.7TX
124220053.34LA
1252200425.87SC
1262200364.98PA
12722002200.0IL
12822001330.82TX
1292300115.19LA
1302300172.63TN
1312300503.58CA
13223002300.0OH
1332300912.91MS
13423752375.0NC
1352400553.86NY
1362400599.94WI
13724002400.0PA
1382400524.09NC
13924001222.09NY
1402400525.24CA
1412400198.94SC
14224002400.0WI
14324001500.51VA
14424002400.0AL
1452400763.25TX
14624001284.56NY
1472400453.21CA
14824002400.0NY
14924002400.0AL
15024002400.0FL
15124001271.64ME
1522400790.18FL
15325001472.04PA
15425002500.0MS
1552500638.54FL
1562500870.18CA
15725002500.0VA
1582500568.52VA
1592500628.51NY
16025001027.25AL
16125002091.65TX
16225001090.3NV
16325001298.63VA
16425001219.13FL
1652500872.09KY
1662500421.41OH
16725001417.8MI
16825001202.49IN
1692500538.98OH
1702500534.11SC
17125002500.0IN
1722500453.46NY
17325002500.0MN
1742500760.49VA
17525251469.46NJ
17626251064.43VT
17726502650.0FL
17826501352.52NY
1792700192.94VA
18027002700.0IN
1812700179.12LA
1822700358.17AR
1832725189.65NV
18428002800.0TX
18528001629.23IN
1862800540.0PA
18728001454.54KY
18828001058.66NJ
18928001365.41NY
19028002800.0TX
1912800897.74WI
1922800391.11UT
19328002800.0WI
1942800123.0CA
1952825303.63PA
19628751568.71WI
19729751910.92AL
1982975558.28MS
1992975188.74CA
20030001875.57CA
20130001215.9TX
20230001207.98MN
2033000861.53TX
20430001192.32SC
20530001691.84CA
20630003000.0KY
2073000232.18KY
2083000535.3WA
2093000690.32KS
2103000611.76TX
2113000225.61TX
21230003000.0MI
2133000688.27TX
2143000723.51NJ
21530003000.0MN
2163000223.61NY
2173000223.62TX
2183000684.26PA
21930003000.0IL
22030001660.37AZ
2213000527.01LA
2223000604.38TX
2233000604.38CT
22430001573.66CA
22530002812.87CA
22630001240.99NY
22730001397.45GA
22830003000.0LA
22930001907.47VA
23030001734.87GA
23130001066.87MS
23230001726.01TX
2333000664.37MN
23430003000.0WI
23530003000.0OH
2363000741.05SD
23730001464.51PA
23830001053.93TX
23930003000.0TN
24030003000.0MD
2413000581.11NY
2423000650.64OH
24330001045.73HI
24430001289.44CA
2453000505.71RI
2463000966.85MD
2473000656.5PA
24830003000.0CA
24930001046.3AZ
2503000505.71AZ
25130001623.81IN
25230001874.04NJ
2533000559.63TX
25430001283.69NE
25530002249.47IL
2563000882.07NC
25730003000.0NJ
25830001701.17MO
2593000209.15MN
2603000209.15TN
26130003000.0CO
2623000571.79NC
26330003000.0FL
2643000953.96TX
26530002801.21PA
26630001272.72FL
26730001689.94IL
26830002800.23FL
26930001517.58AZ
27030003000.0CO
2713000492.9KY
2723000492.9TX
2733000492.9CT
27430003000.0NV
2753000562.93IL
2763000637.04TX
2773000942.21WI
2783000562.93CO
27930001175.6PA
2803000629.44NY
28130001273.89NJ
2823000926.7CA
28330003000.0NH
28430001665.48WI
28530003000.0MN
28630001316.07IL
2873000470.96PA
28830001064.1NJ
28930003000.0TX
29030001650.62MT
29130001388.06FL
29230003000.0PA
29330001135.42NY
29430003000.0NV
2953000526.05IL
2963000811.66NJ
29730001048.57CO
2983000364.07OK
29930001065.43CA
3003000614.69TX
30130251267.09GA
3023025773.57GA
3033025516.45GA
3043025214.69GA
30530251300.11GA
30630252066.13GA
3073025404.14TX
3083025891.96NY
3093025656.78GA
3103025637.2CA
31130501007.02IL
31231003100.0IL
31331003100.0NY
31431251053.94TN
3153125198.37CA
3163150645.23CA
3173200644.46MI
31832003200.0NH
31932003200.0PA
32032001296.48MD
3213200460.12OH
3223200460.11TX
3233200763.92VA
32432003200.0AZ
32532001673.39NV
3263200172.61PA
32732501302.32VA
3283300806.26TX
32933003080.29NY
3303350252.31NJ
33133501937.32AZ
33234003400.0CA
33334002062.45OH
33434253425.0TX
3353450237.51TX
33635003500.0NY
33735001281.06WI
33835001658.72GA
33935003500.0MD
34035003500.0NJ
34135003500.0AL
34235003500.0CO
3433500887.44PA
34435003281.75IL
34535001248.58OR
34635003500.0CA
3473500168.43NC
34835003500.0CA
34935003500.0GA
35035001229.56OH
3513500250.12IL
3523500248.21AR
3533500855.07CA
3543500853.31CA
35535001677.48CO
35635003500.0TX
35735001679.58FL
35835003266.87MO
3593500240.93AL
36035003500.0MO
3613500291.53TX
3623500240.58NV
36335001515.0TX
36435003500.0CA
36535001466.27NY
36635001371.49NJ
36735501952.11FL
36836001529.58KY
3693600291.55FL
3703600640.38NC
37136003600.0CO
37236003600.0TN
3733600259.1OH
3743600917.23LA
3753600787.76CA
37636003600.0MO
37736003600.0MS
3783600966.83AZ
37936001962.77FL
38036003600.0TX
38136002245.97RI
3823600776.05OH
38336001727.7WA
38436001758.34OH
3853600675.57PA
3863600675.57AL
3873600849.44GA
38836003600.0TX
38936001701.56NY
39036003600.0CA
3913600580.05NV
39236001018.75OK
39336251239.53GA
39436501504.95CA
39537001817.82MD
39637503750.0MN
39737501953.35NY
3983750229.41IL
39937751561.18NV
40038001500.4NY
40138002109.52OR
40238003800.0CO
40338001960.28TX
40438253825.0VA
4053850967.97KS
40639003900.0FL
40739251604.31LA
40840002436.76CT
40940002442.62TX
4104000610.36FL
4114000819.0KY
41240002580.85IL
41340001451.57MS
41440001341.04WA
41540001771.82WI
41640004000.0MI
41740004000.0NJ
4184000818.5MN
4194000805.75IL
42040001322.31IN
42140001753.04IL
42240001426.99AR
42340001426.99CA
42440004000.0CA
42540004000.0GA
42640001974.46NJ
42740002199.4OH
4284000999.94NY
42940001417.61TX
43040001634.95WY
4314000287.6AL
4324000988.6CA
43340001580.95WA
43440001302.35OH
43540002286.99NY
43640002517.95CA
4374000880.53CA
43840001018.67NJ
4394000775.26FL
4404000774.74TX
44140004000.0OH
4424000875.78NJ
4434000771.36NY
44440001289.08TX
44540001289.08NY
44640001289.08NJ
44740002279.34GA
44840002279.34TX
44940004000.0CA
45040004000.0NY
45140004000.0IN
4524000663.53CO
45340004000.0OR
45440004000.0TX
45540001588.38NY
45640004000.0NC
45740002253.42FL
45840001264.72VA
45940003733.92OH
46040001911.28GA
4614000657.18NC
46240002864.13WI
46340001687.27IL
46440001365.43MI
46540004000.0PA
4664000750.22GA
46740002129.08CA
46840001567.24NM
4694000943.86IL
47040004000.0NC
47140004000.0NE
47240002924.04NJ
47340004000.0TN
4744000269.13GA
47540001448.55HI
47640004000.0TN
47740003725.68FL
47840002220.47CA
47940004000.0NJ
48040001336.63NY
4814000731.6FL
48240001649.35TX
48340004000.0PA
48440001320.58OR
48540004000.0LA
48640004000.0CT
48740004000.0WV
48840002188.15TX
48940001204.71MI
49040001408.18FL
4914000895.0OH
49240001503.47AK
49340001610.58NY
4944000507.64FL
49540002195.32NY
49640501531.34MO
49740501392.3TX
4984150849.64OH
4994150288.83FL
50042001100.48MA
50142001784.51NJ
50242004200.0CA
50342001642.89FL
50442002210.61TX
50542001852.53MA
50642002068.7NY
50742002428.92NE
50842001367.47CA
5094200708.38CO
51042001682.97FL
51142004200.0CA
51242001016.05FL
51342002121.16WA
51442002601.53PA
5154225853.74IN
51642501633.9NY
51743001704.48WI
51843754375.0TN
51944004400.0NH
5204450820.26AR
52144501699.49CA
52244501625.34GA
52344751623.23VT
52444751785.86RI
52545001946.02AZ
52645004500.0TX
5274500339.06OH
52845004500.0MN
52945001580.82CA
53045004500.0OH
5314500522.12MO
53245002208.2CA
53345002336.35NC
53445004500.0AL
53545001304.97TN
53645251054.09OH
5374550880.72MS
53845501972.49TX
5394550985.29NE
54045751521.23MO
54146752577.73KY
54246752539.53OH
54347001279.09TX
54447252386.11CA
54547502020.38MI
54647501952.18AL
54748004800.0NJ
54848002962.43DC
5494800889.53MO
5504800874.52CA
55148003952.49CO
55248001907.7CA
5534800371.49SC
55448002039.44OH
5554800362.4OH
55648002399.26RI
55748002797.95IL
5584800964.92NY
5594800966.93IL
56048001091.56MA
56148004800.0MA
56248002117.19NY
56348001961.94TX
5644800227.65NC
56548003038.23IL
56648001174.08AL
56748001546.93AZ
5684800809.12TX
56948002735.11MD
57048002873.64MI
57148002721.97TX
57248002443.23GA
5734800334.63FL
57448004800.0TX
5754800900.67NV
57648001625.57FL
5774800658.8TX
5784800213.97MD
57948002251.77MO
58048004800.0UT
58148252405.54MI
58248251422.59CA
58349002002.89NJ
58449252627.0NJ
58549251162.06TX
5864950958.17OH
5874975325.3AZ
58849751869.83CA
58950002003.58CA
59050001137.04NC
59150001140.09FL
59250005000.0TX
5935000386.95CA
59450002819.66NM
59550001043.39TX
59650005000.0CA
59750002262.19CA
59850005000.0CA
59950005000.0NY
60050002517.94NH
60150002517.94MA
60250002103.01CO
60350005000.0FL
60450002094.18GA
6055000892.19VA
60650001022.55FL
60750003515.81MO
6085000250.41CA
6095000889.38PA
6105000889.38FL
61150005000.0IL
61250005000.0AZ
61350003471.91NY
61450003033.67TX
61550002925.85NY
61650002925.85WV
61750001814.35CA
61850001814.35IL
61950001814.35CA
62050001814.35CA
62150001147.06NY
62250005000.0NE
62350005000.0GA
62450005000.0DE
62550002914.11TN
62650002077.42OK
62750001669.45NV
62850005000.0CT
62950002909.17FL
63050002909.17TX
63150001400.26SC
63250002767.29FL
63350005000.0WA
63450001231.62PA
6355000878.27WA
63650005000.0CA
63750001797.09NY
63850001797.09TX
63950002068.43TX
64050002480.7CO
64150005000.0CA
64250001652.78FL
64350001804.16NE
64450001257.01NY
64550001652.78OH
64650002054.5MA
64750005000.0AZ
64850002054.5IN
64950002191.12WA
65050002054.5MI
65150002054.5NY
65250001783.81FL
65350002468.08MA
65450005000.0CA
65550003068.64FL
65650002749.25NY
65750003177.23OH
65850003428.76IL
65950005000.0FL
66050001644.51LA
66150003648.47NY
66250001778.12KY
66350005000.0AZ
66450001644.51IN
66550002043.68MI
66650002062.67NY
66750001773.09NY
66850002180.41IN
66950002180.26CA
67050002180.41NH
67150002597.26SC
67250002597.26OH
67350002943.48IL
67450001372.84FL
67550002734.31KY
6765000239.81LA
6775000359.85WI
6785000359.85MO
6795000481.39OK
68050005000.0CA
68150005000.0MD
6825000359.47OH
6835000979.85FL
68450001967.9NC
68550005000.0NV
68650001627.8CO
68750005000.0WA
68850002025.76PA
68950002025.76CT
69050001756.45WA
69150002438.26NY
69250002438.26OH
6935000357.32TX
69450004677.11MI
69550003148.05VA
69650002858.61VA
69750001751.91NM
69850001619.52WI
69950001751.27FL
70050003282.44CA
70150001228.57IN
70250005000.0CA
70350005000.0NC
7045000843.86MD
7055000843.86CA
70650005000.0MD
70750001222.95NY
7085000969.03MN
7095000843.33CA
7105000843.33IN
71150005000.0PA
7125000843.33NY
71350001094.75MI
7145000843.33IN
7155000968.44PA
71650002012.69NV
71750005000.0AZ
7185000719.1MI
71950002012.69TX
72050005000.0CA
72150001744.04NE
72250001221.74NC
7235000474.71TX
7245000842.41MI
7255000967.85WY
7265000967.85GA
72750005000.0LA
72850001180.77MD
7295000718.93CA
73050001094.1IL
7315000967.86NY
7325000842.81NY
7335000842.8NJ
73450005000.0WI
73550002706.19NV
73650001872.56IL
73750004674.41NY
73850005000.0CA
73950002415.79TX
74050005000.0IL
74150002003.37MD
74250002003.37IL
74350001210.36NY
74450005000.0NY
74550002692.22IL
74650002834.25PA
74750002835.36GA
74850002835.36PA
74950005000.0WI
75050005000.0NY
75150003408.03FL
7525000231.16CA
75350005000.0NJ
75450003119.24VA
75550002684.95NV
75650001204.08PA
75750005000.0NY
75850001204.07CA
7595000829.41FL
7605000952.98NJ
7615000829.41IN
7625000465.43NY
76350001721.61PA
7645000585.42CA
76550001589.98AZ
76650001589.98CA
76750005000.0CO
76850001985.05GA
76950002121.29LA
77050001746.82TX
77150005000.0FL
77250002676.01DC
77350002534.18NY
77450002816.65VA
77550003108.3CA
77650002673.01TN
77750003406.19CA
77850001193.66NC
7795000944.14CA
78050005000.0WA
7815000944.14FL
78250001578.57NJ
7835000700.13WI
7845000344.2WA
78550002108.09NV
78650005000.0MS
78750002108.9CA
78850002666.97HI
78950005000.0NY
79050002384.78OH
79150005000.0VA
79250001186.72NJ
79350001186.72AK
79450005000.0PA
79550001568.22NY
7965000816.15NJ
7975000816.15NY
79850001890.65MD
79950003097.08TN
80050005000.0CT
80150001956.57TX
80250005000.0SC
80350004496.36NY
80450001552.32TX
8055000805.65NY
8065000686.23GA
8075000336.41NY
80850001943.68MO
80950001944.13CA
81050001944.12CA
81150001544.45WA
81250001544.45FL
81350005000.0WI
8145000795.24NC
81550001159.28OR
81650001786.24NY
81750005000.0NM
81850001650.88CA
81950005000.0NM
8205000213.58CA
82150005000.0OH
82250002026.3TX
82350001877.86CA
8245000754.43NY
82550005000.0CA
8265000287.7CA
82750501822.02FL
82850501761.5FL
82950501088.66GA
83051001998.39NY
83151003238.26NY
8325100833.81IL
8335125380.54WI
83451255125.0GA
83552004872.59CT
83652004867.83CT
83752001137.92CA
83852003244.04MI
83952005200.0FL
8405200109.57AL
84152752473.35CA
84252751726.48AR
84353001246.97NC
8445300400.6TX
8455300853.23OH
84653251064.02NJ
84753505350.0MD
84853751141.34OK
8495375971.18WA
8505375201.62WI
85154003197.27VA
85254001821.23TN
85354005400.0NJ
8545400383.23NC
8555400910.23CA
8565400639.58MD
85754002551.69IL
85854252217.67TX
85954503505.54IL
86054502015.06IL
86155002464.58VA
86255002056.83NC
86355002488.45VA
86455005500.0NV
86555003062.49PA
86655002740.67IL
86755005500.0NY
86855001351.46NJ
86955001772.5OH
87055002333.51DE
87155005500.0TX
87255002940.62AR
87355003418.5NC
87455001839.18AR
87555255525.0SD
87655502268.61NY
8775575348.61CA
87856005600.0VA
87956002300.94OH
88056001997.79CA
88156002764.1IL
88256005600.0MN
88356001986.34NE
88456005600.0MD
88556005600.0HI
88656001961.29MN
88756005600.0OH
8885600928.93NY
8895600928.93MN
89056005600.0WA
89156001862.16NY
89256005212.88MD
89356005600.0VA
89456002094.18NY
89556251055.48TN
89657001262.21NY
89757005700.0GA
89857002350.24NY
89957001653.81WI
90057503325.17MN
9015825769.02GA
90258251877.16TX
90358255428.77OH
90458251177.99CO
90558502807.36MN
90659252613.06NJ
90759251319.57PA
90859502172.28PA
90960002503.88OH
91060002452.77SC
91160002252.11VA
91260001111.87CA
91360006000.0FL
91460001183.8NY
91560002234.24IL
91660001023.4SC
91760001252.02SC
91860002549.14CA
91960001252.02MI
92060006000.0OR
92160002549.14CO
92260002549.14TX
92360002549.14CA
92460002549.13OH
92560006000.0VA
92660005637.76CA
92760003532.84IL
92860004961.58NY
92960002523.52CO
93060002688.58IL
93160003570.23CA
93260006000.0TX
93360001070.59NY
93460003010.27TX
93560006000.0CA
93660001067.27CA
93760001538.61CT
93860002506.68IL
93960006000.0OH
94060002173.8NY
94160002177.28VA
94260002503.88LA
94360002177.28MI
94460002668.69AZ
94560001219.68WA
94660001376.44NY
94760001532.52MI
9486000904.37OH
94960003840.98NJ
95060003840.98MA
95160002657.63GA
95260002989.66FL
9536000447.23PA
95460003259.17GA
95560002003.45NY
95660002164.84TX
9576000447.12PA
95860006000.0NJ
9596000447.11MN
96060001208.67AZ
96160001364.46FL
96260006000.0AR
96360001515.27WA
96460002201.19NV
96560002629.57NJ
96660002629.57CA
96760002465.3IL
96860002465.3OR
96960002465.3NJ
97060002629.57KY
97160003450.57NY
97260002961.69NY
97360003299.1MI
97460003299.1CA
97560003814.95OR
97660002031.1NJ
97760002616.49NY
97860002128.22CA
97960002947.74CA
98060002947.74NY
98160003111.55CA
98260006000.0FL
98360001322.03NY
98460001482.8MT
98560002594.63FL
98660002107.71NY
9876000428.76CA
98860001943.5FL
98960006000.0IL
99060001012.67NY
99160001314.53UT
99260001162.15DE
99360001466.73OH
99460001012.01CA
99560001012.01AZ
99660001466.73AZ
99760002414.39CA
99860002578.88OK
99960002415.37FL
Showing the first 1000 rows.
"]},"transient":null}],"execution_count":0},{"cell_type":"code","source":["iot_devices = spark.read\\\n .format(\"delta\")\\\n .load(\"abfss://test-container@rrdemostorageacc.dfs.core.windows.net/iot-devices\")\n\ndisplay(iot_devices)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"086f47dd-9611-4f69-842c-56877c8cf00a"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"
","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n
"]},"transient":null},{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"data":"
---------------------------------------------------------------------------\nAnalysisException Traceback (most recent call last)\n<command-4023870332312377> in <module>\n----> 1 iot_devices = spark.read\\\n 2 .format("delta")\\\n 3 .load("abfss://test-container@rrdemostorageacc.dfs.core.windows.net/iot-devices")\n 4 \n 5 display(iot_devices)\n\n/databricks/spark/python/pyspark/sql/readwriter.py in load(self, path, format, schema, **options)\n 202 self.options(**options)\n 203 if isinstance(path, str):\n--> 204 return self._df(self._jreader.load(path))\n 205 elif path is not None:\n 206 if type(path) != list:\n\n/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in __call__(self, *args)\n 1302 \n 1303 answer = self.gateway_client.send_command(command)\n-> 1304 return_value = get_return_value(\n 1305 answer, self.gateway_client, self.target_id, self.name)\n 1306 \n\n/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)\n 121 # Hide where the exception came from that shows a non-Pythonic\n 122 # JVM exception message.\n--> 123 raise converted from None\n 124 else:\n 125 raise\n\nAnalysisException: Incompatible format detected.\n\nYou are trying to read from `abfss://test-container@rrdemostorageacc.dfs.core.windows.net/iot-devices` using Databricks Delta, but there is no\ntransaction log present. Check the upstream job to make sure that it is writing\nusing format("delta") and that you are trying to read from the table base path.\n\nTo disable this check, SET spark.databricks.delta.formatCheck.enabled=false\nTo learn more about Delta, see https://docs.microsoft.com/azure/databricks/delta/index\n
","errorSummary":"AnalysisException: Incompatible format detected.","metadata":{},"errorTraceType":"html","type":"ipynbError","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n
---------------------------------------------------------------------------\nAnalysisException Traceback (most recent call last)\n<command-4023870332312377> in <module>\n----> 1 iot_devices = spark.read\\\n 2 .format("delta")\\\n 3 .load("abfss://test-container@rrdemostorageacc.dfs.core.windows.net/iot-devices")\n 4 \n 5 display(iot_devices)\n\n/databricks/spark/python/pyspark/sql/readwriter.py in load(self, path, format, schema, **options)\n 202 self.options(**options)\n 203 if isinstance(path, str):\n--> 204 return self._df(self._jreader.load(path))\n 205 elif path is not None:\n 206 if type(path) != list:\n\n/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in __call__(self, *args)\n 1302 \n 1303 answer = self.gateway_client.send_command(command)\n-> 1304 return_value = get_return_value(\n 1305 answer, self.gateway_client, self.target_id, self.name)\n 1306 \n\n/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)\n 121 # Hide where the exception came from that shows a non-Pythonic\n 122 # JVM exception message.\n--> 123 raise converted from None\n 124 else:\n 125 raise\n\nAnalysisException: Incompatible format detected.\n\nYou are trying to read from `abfss://test-container@rrdemostorageacc.dfs.core.windows.net/iot-devices` using Databricks Delta, but there is no\ntransaction log present. Check the upstream job to make sure that it is writing\nusing format("delta") and that you are trying to read from the table base path.\n\nTo disable this check, SET spark.databricks.delta.formatCheck.enabled=false\nTo learn more about Delta, see https://docs.microsoft.com/azure/databricks/delta/index\n
"]},"transient":null}],"execution_count":0}],"metadata":{"application/vnd.databricks.v1+notebook":{"notebookName":"pattern 4- cluster scoped principals - loans","dashboards":[],"notebookMetadata":{"pythonIndentUnit":4},"language":"python","widgets":{},"notebookOrigID":4023870332312374}},"nbformat":4,"nbformat_minor":0} 2 | -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/Pattern6.md: -------------------------------------------------------------------------------- 1 | # Tutorial for Pattern 6: Databricks Tables Access Control 2 | 3 | **Summary:** 4 | 5 | This document provides links to a tutorial on implementing Pattern 6: Databricks Tables Access Control. 6 | It is highly recommended to check [Databricks SQL security model and data access overview](https://docs.microsoft.com/en-us/azure/databricks/sql/user/security/data-access-overview) to gain better understanding of this approach. 7 | 8 | **Versions:** 9 | 10 | | **Name** | **Title** | **Notes** | **Date** | 11 | | --- | --- | --- | --- | 12 | | Anil Sener | Microsoft Cloud Solution Architect – Data & AI | Original | 01 December 2021 | 13 | | | | | | 14 | 15 | # Contents 16 | 17 | [Pre-requisites](#Pre-requisites) 18 | 19 | [Tutorial Steps](#Tutorial-Steps) 20 | 21 | [License/Terms of Use](#License/Terms-of-Use) 22 | 23 | ## Pre-requisites 24 | 25 | This tutorial requires the completion of the steps in [Connecting securely to ADLS from ADB](../../Readme.md#connecting-securely-to-adls-from-adb) section. 26 | 27 | This tutorial requires the completion of the steps in the [preparation](../preparation/Readme.md) section. 28 | 29 | This tutorial requires a premium Databricks Workspace. 30 | 31 | ## Tutorial Steps 32 | 1. Navigate to the premium Azure Databricks Workspace > Overview on Azure Portal and click Launch Workspace button, choose and admin user to login. When Azure Databricks Workspace is displayed, navigate to Compute and then create a High Concurrency cluster, enable Table Access Control for this cluster and set the Spark Configuration as below. Set the permissions to allow LoanGroup and IoTDevicesGroup to attach this cluster: 33 | 34 |

35 | 36 |

37 | 38 |

39 | 40 |

41 | 42 | 2. Navigate to SQL on Databricks Menu: 43 |

44 | 45 |

46 | 47 | 3. Navigate to Configure data access > SQL Endpoint Settings and [add a Service Principal](https://docs.microsoft.com/en-us/azure/databricks/sql/admin/data-access-configuration#storage-access) as follows. Please remember to replace ``,``,``,`` and `` (your Azure Tenant ID). You can use the ``,`` and `` from the App Registration and ADB Secrets in [Connecting securely to ADLS from ADB](../../Readme.md#connecting-securely-to-adls-from-adb) section: 48 | 49 | ``` 50 | spark.hadoop.fs.azure.account.auth.type..dfs.core.windows.net OAuth 51 | spark.hadoop.fs.azure.account.oauth.provider.type..dfs.core.windows.net org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider 52 | spark.hadoop.fs.azure.account.oauth2.client.id..dfs.core.windows.net 53 | spark.hadoop.fs.azure.account.oauth2.client.secret..dfs.core.windows.net {{secrets//}} 54 | spark.hadoop.fs.azure.account.oauth2.client.endpoint..dfs.core.windows.net https://login.microsoftonline.com//oauth2/token 55 | ``` 56 | 57 |

58 | 59 |

60 | 61 | 62 | 4. Navigate back to the main SQL screen and then navigate to Review SQL endpoints on the home screen (or navigate to SQL Endpoints in the Databricks Menu on the left) and click Create SQL Enpoint button to create and SQL endpoint with the following settings. 63 |

64 | 65 |

66 | 67 | 5. Start the SQL Endpoint clicking on start button next to the SQL endpoint that you have created. It might take few minutes to observe the SQL Endpoint to move to Running State: 68 |

69 | 70 |

71 | 72 |

73 | 74 |

75 | 76 | 77 | 6. When the SQL endpoint started, navigate to Queries in the Databricks Menu and click Create Query button: 78 |

79 | 80 |

81 | 82 | 7. Select the created and started SQL Endpoint in the New Query Editor: 83 |

84 | 85 |

86 | 87 | 8. Copy and select the commands below in the Query Editor and the click Run Selected button to execute the commands: 88 | ``` 89 | GRANT USAGE 90 | ON DATABASE testdb 91 | TO 92 | `LoanGroup`; 93 | 94 | GRANT USAGE 95 | ON DATABASE testdb 96 | TO 97 | `IoTDevicesGroup`; 98 | 99 | GRANT READ_METADATA,`SELECT`, MODIFY 100 | ON TABLE testdb.iot_devices 101 | TO 102 | `IoTDevicesGroup`; 103 | 104 | 105 | GRANT READ_METADATA,`SELECT`, MODIFY 106 | ON TABLE testdb.loans 107 | TO 108 | `LoanGroup`; 109 | 110 | DENY READ_METADATA,`SELECT`, MODIFY 111 | ON TABLE testdb.iot_devices 112 | TO 113 | `LoanGroup`; 114 | 115 | DENY READ_METADATA,`SELECT`, MODIFY 116 | ON TABLE testdb.loans 117 | TO 118 | `IoTDevicesGroup`; 119 | ``` 120 | 121 |

122 | 123 |

124 | 125 | 9. (Optional) Click to Grant Permissions item on the popup menu near the Databricks Menu, find `testdb` and check permissions. Check the permissions for the tables as well: 126 |

127 | 128 |

129 | 130 |

131 | 132 |

133 | 134 |

135 | 136 |

137 | 138 | 139 | 10. Navigate to the premium Azure Databricks Workspace > Overview on Azure Portal and click Launch Workspace button, choose and TestUser1 user to login. When Azure Databricks Workspace is displayed, navigate to Workspace, then upload [pattern6-table-access-control.ipynb](notebooks/testuser1/pattern6-table-access-control.ipynb) notebook to the Databricks Workspace and open the notebook, attach & start the cluster created in step 1 and then run all cells: 140 |

141 | 142 |

143 | 144 |

145 | 146 |

147 | 148 | RESULT: Neither of the file paths are directly accessible anymore. IotDevicesGroup Databricks Group users can only access to `testdb.iot_devices` table, the user received an explicity privilidge exception for `testdb.loans` table. These permissions are limited thanks to the privileges granted & denied by Databricks Table Control. 149 | 150 | 151 | 11. Navigate to the premium Azure Databricks Workspace > Overview on Azure Portal and click Launch Workspace button, choose and TestUser2 user to login. When Azure Databricks Workspace is displayed, navigate to Workspace, then upload [pattern6-table-access-control.ipynb](notebooks/testuser2/pattern6-table-access-control.ipynb) notebook to the Databricks Workspace and open the notebook, attach & start the cluster created in step 1 and then run all cells: 152 |

153 | 154 |

155 | 156 |

157 | 158 |

159 | 160 | RESULT: Neither of the file paths are directly accessible anymore. LoansGroup Databricks Group users can only access to `testdb.loans` table, the user received an explicity privilidge exception for `testdb.iot_devices` table. These permissions are limited thanks to the privileges granted & denied by Databricks Table Control. 161 | 162 | 163 | ## License/Terms of Use 164 | 165 | This is a free white paper released into the public domain. 166 | 167 | Anyone is free to use or distribute this white paper, for any purpose, 168 | commercial or non-commercial, and by any means. 169 | 170 | THE WHITE PAPER IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, 171 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 172 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 173 | 174 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 175 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 176 | FROM, OUT OF OR IN CONNECTION WITH THE WHITE PAPER. 177 | 178 | -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_1.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_10.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_10_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_10_a.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_1_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_1_a.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_2.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_3.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_4.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_5.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_5_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_5_a.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_6.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_7.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_8.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_8_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_8_a.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_8_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_8_b.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_8_c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_8_c.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_9.png -------------------------------------------------------------------------------- /tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_9_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/pattern_6_Databricks_Table_Access_Control/media/pattern_6_step_9_a.png -------------------------------------------------------------------------------- /tutorials/preparation/Preparation.md: -------------------------------------------------------------------------------- 1 | # Preparation Steps 2 | 3 | **Summary:** 4 | 5 | This document provides the preparation steps required to execute all tutorials. 6 | 7 | **Versions:** 8 | 9 | | **Name** | **Title** | **Notes** | **Date** | 10 | | --- | --- | --- | --- | 11 | | Anil Sener | Microsoft Cloud Solution Architect – Data & AI | Original | 01 December 2021 | 12 | | | | | | 13 | 14 | # Contents 15 | 16 | [Pre-requisites](#Pre-requisites) 17 | 18 | [Tutorial Steps](#Tutorial-Steps) 19 | 20 | [License/Terms of Use](#License/Terms-of-Use) 21 | 22 | ## Pre-requisites 23 | 24 | This tutorial requires the completion of the steps in [Connecting securely to ADLS from ADB](../../Readme.md#connecting-securely-to-adls-from-adb) section. 25 | 26 | ## Tutorial Steps 27 | 1. Navigate to Azure Active Directory > Users and then create two users as below: 28 |

29 | 30 |

31 | 32 | 2. Navigate to Azure Active Directory > Groups and then create two users as below: 33 |

34 | 35 |

36 | 37 | 3. Click on group1 and then navigate to members and add TestUser1: 38 |

39 | 40 |

41 | 42 | 4. Click on group2 and then navigate to members and add TestUser2: 43 |

44 | 45 |

46 | 47 | 5. Navigate to the Storage Accounts, drill down to the storage account created in the setup steps and navigate to Containers. Then, create a container called test_container with the following settings. 48 |

49 | 50 |

51 | 52 | 6. Navigate to the premium Azure Databricks Workspace created, navigate to Access Control (IAM) > Role Assignments tab and then add both of the AD groups as a Contributor: 53 |

54 | 55 |

56 | 57 | 7. Navigate to the premium Azure Databricks Workspace > Overview and click Launch Workspace button, choose and admin user to login. When Azure Databricks Workspace is displayed, navigate to Settings > Admin Console > Users. Then, add two AD users created using their emails. Please ensure that they are not Admin users and they cannot create any cluster: 58 |

59 | 60 |

61 | 62 | 8. Navigate to Settings > Admin Console > Groups. Create two Databricks user groups called IoTDevicesGroup and LoanGroup. 63 | Then, add two AD users created using their emails. Please ensure that they are not Admin users and they cannot create any cluster: 64 |

65 | 66 |

67 | 68 | 9. Click IoTDevicesGroup group and add testuser1 as a member to the group: 69 |

70 | 71 |

72 | 73 | 10. Click LoanGroup group and add testuser2 as a member to the group: 74 |

75 | 76 |

77 | 78 | 11. Navigate to Settings > Admin Console > Workspace Settings. Please ensure that all the following access control settings are enabled: 79 |

80 | 81 |

82 | 83 | 12. Navigate to Compute and create a single node cluster with the following settings: 84 |

85 | 86 |

87 | 88 | 13. Navigate to Workspace and upload [Preparation.ipynb](notebooks/Preparation.ipynb) to the Databricks Workspace and open the notebook, attach the cluster created in the previous step and start the cluster. When the cluster is ready please run all the cells in the notebook: 89 |

90 | 91 |

92 | 93 | RESULT: This notebook will create iot_devices and loans folders under test_container using example databricks-datasets which will be essential for all patterns demonstrated in the tutorials. Last cell which creates two databricks tables is a pre-condition for only pattern 6. 94 | 95 | 96 | ## License/Terms of Use 97 | 98 | This is a free white paper released into the public domain. 99 | 100 | Anyone is free to use or distribute this white paper, for any purpose, 101 | commercial or non-commercial, and by any means. 102 | 103 | THE WHITE PAPER IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, 104 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 105 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 106 | 107 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 108 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 109 | FROM, OUT OF OR IN CONNECTION WITH THE WHITE PAPER. 110 | 111 | -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_1.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_2.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_2_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_2_a.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_2_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_2_b.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_3.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_4.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_5.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_6.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_6_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_6_a.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_6_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_6_b.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_7.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_8.png -------------------------------------------------------------------------------- /tutorials/preparation/media/preparation_step_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurtn/datalake-ADLS-access-patterns-with-Databricks/b990dfdbdcfa7044dad83bf2fa45f2d4b7a37fff/tutorials/preparation/media/preparation_step_9.png -------------------------------------------------------------------------------- /tutorials/preparation/notebooks/Preparation.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"markdown","source":["# Preparation"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"c2b51a9b-baf6-458d-b312-fb2f93e48839"}}},{"cell_type":"code","source":["%sh\n\nnslookup rrdemostorageacc.dfs.core.windows.net"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"2ea31503-bbe1-4d78-9e93-e0b72919491e"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"
Server:\t\t168.63.129.16\nAddress:\t168.63.129.16#53\n\nNon-authoritative answer:\nrrdemostorageacc.dfs.core.windows.net\tcanonical name = rrdemostorageacc.privatelink.dfs.core.windows.net.\nName:\trrdemostorageacc.privatelink.dfs.core.windows.net\nAddress: 10.1.0.4\n\n
","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n
Server:\t\t168.63.129.16\nAddress:\t168.63.129.16#53\n\nNon-authoritative answer:\nrrdemostorageacc.dfs.core.windows.net\tcanonical name = rrdemostorageacc.privatelink.dfs.core.windows.net.\nName:\trrdemostorageacc.privatelink.dfs.core.windows.net\nAddress: 10.1.0.4\n\n
"]},"transient":null}],"execution_count":0},{"cell_type":"code","source":["tenant_id=\"8a7b0420-a9f1-4cb9-8567-8a5b02d42bd2\"\nscope=\"rr-demo-secret-scope\"\nsecret=\"rr-demo-secret-key\"\nservice_principal_id=\"15317507-66ff-48e8-89ac-fca37c38f38e\"\n\nmount_path=\"/mnt/test-mount\""],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"541b1372-ad41-4d8f-ace2-b0d1c30d56b8"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"
","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n
"]},"transient":null}],"execution_count":0},{"cell_type":"code","source":["#https://docs.microsoft.com/en-gb/azure/databricks/data/data-sources/azure/adls-gen2/azure-datalake-gen2-sp-access\n\nconfigs = {\n \"fs.azure.account.auth.type\": \"OAuth\",\n \"fs.azure.account.oauth.provider.type\": \"org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider\",\n \"fs.azure.account.oauth2.client.id\": service_principal_id,\n \"fs.azure.account.oauth2.client.secret\": dbutils.secrets.get(scope = scope, key = secret),\n \"fs.azure.account.oauth2.client.endpoint\": \"https://login.microsoftonline.com/\"+tenant_id+\"/oauth2/token\"\n}\n\ndbutils.fs.unmount(\n mount_point = mount_path\n)\ndbutils.fs.mount(\n source = \"abfss://test-container@rrdemostorageacc.dfs.core.windows.net/\",\n mount_point = mount_path,\n extra_configs= configs\n)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"936ec12c-49f5-404a-8c29-1f671ac368ef"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"
/mnt/test-mount has been unmounted.\nOut[2]: True
","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n
/mnt/test-mount has been unmounted.\nOut[2]: True
"]},"transient":null}],"execution_count":0},{"cell_type":"code","source":["\n\ndisplay(dbutils.fs.ls('/databricks-datasets'))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"7fd2b642-41bb-40f9-8c18-9cfb53cf6746"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[["dbfs:/databricks-datasets/COVID/","COVID/",0],["dbfs:/databricks-datasets/README.md","README.md",976],["dbfs:/databricks-datasets/Rdatasets/","Rdatasets/",0],["dbfs:/databricks-datasets/SPARK_README.md","SPARK_README.md",3359],["dbfs:/databricks-datasets/adult/","adult/",0],["dbfs:/databricks-datasets/airlines/","airlines/",0],["dbfs:/databricks-datasets/amazon/","amazon/",0],["dbfs:/databricks-datasets/asa/","asa/",0],["dbfs:/databricks-datasets/atlas_higgs/","atlas_higgs/",0],["dbfs:/databricks-datasets/bikeSharing/","bikeSharing/",0],["dbfs:/databricks-datasets/cctvVideos/","cctvVideos/",0],["dbfs:/databricks-datasets/credit-card-fraud/","credit-card-fraud/",0],["dbfs:/databricks-datasets/cs100/","cs100/",0],["dbfs:/databricks-datasets/cs110x/","cs110x/",0],["dbfs:/databricks-datasets/cs190/","cs190/",0],["dbfs:/databricks-datasets/data.gov/","data.gov/",0],["dbfs:/databricks-datasets/definitive-guide/","definitive-guide/",0],["dbfs:/databricks-datasets/delta-sharing/","delta-sharing/",0],["dbfs:/databricks-datasets/flights/","flights/",0],["dbfs:/databricks-datasets/flower_photos/","flower_photos/",0],["dbfs:/databricks-datasets/flowers/","flowers/",0],["dbfs:/databricks-datasets/genomics/","genomics/",0],["dbfs:/databricks-datasets/hail/","hail/",0],["dbfs:/databricks-datasets/iot/","iot/",0],["dbfs:/databricks-datasets/iot-stream/","iot-stream/",0],["dbfs:/databricks-datasets/learning-spark/","learning-spark/",0],["dbfs:/databricks-datasets/learning-spark-v2/","learning-spark-v2/",0],["dbfs:/databricks-datasets/lending-club-loan-stats/","lending-club-loan-stats/",0],["dbfs:/databricks-datasets/med-images/","med-images/",0],["dbfs:/databricks-datasets/mnist-digits/","mnist-digits/",0],["dbfs:/databricks-datasets/news20.binary/","news20.binary/",0],["dbfs:/databricks-datasets/nyctaxi/","nyctaxi/",0],["dbfs:/databricks-datasets/nyctaxi-with-zipcodes/","nyctaxi-with-zipcodes/",0],["dbfs:/databricks-datasets/online_retail/","online_retail/",0],["dbfs:/databricks-datasets/overlap-join/","overlap-join/",0],["dbfs:/databricks-datasets/power-plant/","power-plant/",0],["dbfs:/databricks-datasets/retail-org/","retail-org/",0],["dbfs:/databricks-datasets/rwe/","rwe/",0],["dbfs:/databricks-datasets/sai-summit-2019-sf/","sai-summit-2019-sf/",0],["dbfs:/databricks-datasets/sample_logs/","sample_logs/",0],["dbfs:/databricks-datasets/samples/","samples/",0],["dbfs:/databricks-datasets/sfo_customer_survey/","sfo_customer_survey/",0],["dbfs:/databricks-datasets/sms_spam_collection/","sms_spam_collection/",0],["dbfs:/databricks-datasets/songs/","songs/",0],["dbfs:/databricks-datasets/structured-streaming/","structured-streaming/",0],["dbfs:/databricks-datasets/timeseries/","timeseries/",0],["dbfs:/databricks-datasets/tpch/","tpch/",0],["dbfs:/databricks-datasets/weather/","weather/",0],["dbfs:/databricks-datasets/wiki/","wiki/",0],["dbfs:/databricks-datasets/wikipedia-datasets/","wikipedia-datasets/",0],["dbfs:/databricks-datasets/wine-quality/","wine-quality/",0]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"path","type":"\"string\"","metadata":"{}"},{"name":"name","type":"\"string\"","metadata":"{}"},{"name":"size","type":"\"long\"","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["
pathnamesize
dbfs:/databricks-datasets/COVID/COVID/0
dbfs:/databricks-datasets/README.mdREADME.md976
dbfs:/databricks-datasets/Rdatasets/Rdatasets/0
dbfs:/databricks-datasets/SPARK_README.mdSPARK_README.md3359
dbfs:/databricks-datasets/adult/adult/0
dbfs:/databricks-datasets/airlines/airlines/0
dbfs:/databricks-datasets/amazon/amazon/0
dbfs:/databricks-datasets/asa/asa/0
dbfs:/databricks-datasets/atlas_higgs/atlas_higgs/0
dbfs:/databricks-datasets/bikeSharing/bikeSharing/0
dbfs:/databricks-datasets/cctvVideos/cctvVideos/0
dbfs:/databricks-datasets/credit-card-fraud/credit-card-fraud/0
dbfs:/databricks-datasets/cs100/cs100/0
dbfs:/databricks-datasets/cs110x/cs110x/0
dbfs:/databricks-datasets/cs190/cs190/0
dbfs:/databricks-datasets/data.gov/data.gov/0
dbfs:/databricks-datasets/definitive-guide/definitive-guide/0
dbfs:/databricks-datasets/delta-sharing/delta-sharing/0
dbfs:/databricks-datasets/flights/flights/0
dbfs:/databricks-datasets/flower_photos/flower_photos/0
dbfs:/databricks-datasets/flowers/flowers/0
dbfs:/databricks-datasets/genomics/genomics/0
dbfs:/databricks-datasets/hail/hail/0
dbfs:/databricks-datasets/iot/iot/0
dbfs:/databricks-datasets/iot-stream/iot-stream/0
dbfs:/databricks-datasets/learning-spark/learning-spark/0
dbfs:/databricks-datasets/learning-spark-v2/learning-spark-v2/0
dbfs:/databricks-datasets/lending-club-loan-stats/lending-club-loan-stats/0
dbfs:/databricks-datasets/med-images/med-images/0
dbfs:/databricks-datasets/mnist-digits/mnist-digits/0
dbfs:/databricks-datasets/news20.binary/news20.binary/0
dbfs:/databricks-datasets/nyctaxi/nyctaxi/0
dbfs:/databricks-datasets/nyctaxi-with-zipcodes/nyctaxi-with-zipcodes/0
dbfs:/databricks-datasets/online_retail/online_retail/0
dbfs:/databricks-datasets/overlap-join/overlap-join/0
dbfs:/databricks-datasets/power-plant/power-plant/0
dbfs:/databricks-datasets/retail-org/retail-org/0
dbfs:/databricks-datasets/rwe/rwe/0
dbfs:/databricks-datasets/sai-summit-2019-sf/sai-summit-2019-sf/0
dbfs:/databricks-datasets/sample_logs/sample_logs/0
dbfs:/databricks-datasets/samples/samples/0
dbfs:/databricks-datasets/sfo_customer_survey/sfo_customer_survey/0
dbfs:/databricks-datasets/sms_spam_collection/sms_spam_collection/0
dbfs:/databricks-datasets/songs/songs/0
dbfs:/databricks-datasets/structured-streaming/structured-streaming/0
dbfs:/databricks-datasets/timeseries/timeseries/0
dbfs:/databricks-datasets/tpch/tpch/0
dbfs:/databricks-datasets/weather/weather/0
dbfs:/databricks-datasets/wiki/wiki/0
dbfs:/databricks-datasets/wikipedia-datasets/wikipedia-datasets/0
dbfs:/databricks-datasets/wine-quality/wine-quality/0
"]},"transient":null}],"execution_count":0},{"cell_type":"code","source":["read_format = 'json'\nwrite_format = 'delta'\nload_path = '/databricks-datasets/learning-spark-v2/iot-devices/iot_devices.json'\nsave_path = mount_path+'/iot-devices'\ntable_name = 'default.iot_devices'\n\n# Load the data from its source.\niot_devices = spark \\\n .read \\\n .format(read_format) \\\n .load(load_path)\n\n# Write the data to its target.\niot_devices.write \\\n .format(write_format) \\\n .mode(\"overwrite\") \\\n .save(save_path)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"52ddf5b2-9d5a-47b4-a910-8f38e4aac189"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"
","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n
"]},"transient":null}],"execution_count":0},{"cell_type":"code","source":["\ndisplay(dbutils.fs.ls(mount_path))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"61e734c3-7fb2-408d-bc8a-4d9280916dd3"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[["dbfs:/mnt/test-mount/loans/","loans/",0],["dbfs:/mnt/test-mount/people-10m/","people-10m/",0]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"path","type":"\"string\"","metadata":"{}"},{"name":"name","type":"\"string\"","metadata":"{}"},{"name":"size","type":"\"long\"","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["
pathnamesize
dbfs:/mnt/test-mount/loans/loans/0
dbfs:/mnt/test-mount/people-10m/people-10m/0
"]},"transient":null}],"execution_count":0},{"cell_type":"code","source":["read_format = 'parquet'\nwrite_format = 'delta'\nload_path = '/databricks-datasets/learning-spark-v2/loans/loan-risks.snappy.parquet'\nsave_path = mount_path+'/loans'\ntable_name = 'default.loans'\n\n# Load the data from its source.\nloans = spark \\\n .read \\\n .format(read_format) \\\n .load(load_path)\n\n# Write the data to its target.\nloans.write \\\n .format(write_format) \\\n .mode(\"overwrite\") \\\n .save(save_path)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"99733bc9-4779-4379-893a-ac59633d3e32"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"
","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["\n
"]},"transient":null}],"execution_count":0},{"cell_type":"code","source":["%sql\n\nCREATE DATABASE testdb;\n\nCREATE TABLE testdb.iot_devices\n USING DELTA\n LOCATION '/mnt/test-mount/iot-devices';\n \nCREATE TABLE testdb.loans\n USING DELTA\n LOCATION '/mnt/test-mount/loans';"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"a46de3a8-088e-4c9a-ab70-cc0c334b0b5b"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["
"]},"transient":null}],"execution_count":0}],"metadata":{"application/vnd.databricks.v1+notebook":{"notebookName":"Preparation","dashboards":[],"notebookMetadata":{"pythonIndentUnit":4},"language":"python","widgets":{},"notebookOrigID":3151386918101723}},"nbformat":4,"nbformat_minor":0} 2 | --------------------------------------------------------------------------------