├── Setup ├── Template │ ├── .vs │ │ ├── ProjectSettings.json │ │ ├── slnx.sqlite │ │ ├── VSWorkspaceState.json │ │ ├── Template │ │ │ └── v15 │ │ │ │ └── .suo │ │ └── config │ │ │ └── applicationhost.config │ ├── parameters.json │ ├── deploy.sh │ ├── deployer.rb │ ├── deploy.ps1 │ ├── template.json │ └── DeploymentHelper.cs ├── Deployment │ └── HDInsightLabsEnvironment │ │ ├── .vs │ │ └── HDInsightLabsEnvironment │ │ │ └── v15 │ │ │ └── .suo │ │ ├── HDInsightLabsEnvironment │ │ ├── HDInsightLabsEnvironment.deployproj.user │ │ ├── bin │ │ │ └── Debug │ │ │ │ └── staging │ │ │ │ └── HDInsightLabsEnvironment │ │ │ │ ├── azuredeploy.parameters.json │ │ │ │ ├── azuredeploy.json │ │ │ │ └── Deploy-AzureResourceGroup.ps1 │ │ ├── azuredeploy.parameters.json │ │ ├── HDInsightLabsEnvironment.deployproj │ │ ├── Deployment.targets │ │ ├── azuredeploy.json │ │ └── Deploy-AzureResourceGroup.ps1 │ │ └── HDInsightLabsEnvironment.sln ├── Environment-Cleanup.md ├── Scripts │ ├── azuredeploy.all.parameters.json │ ├── GenerateCert.ps1 │ ├── Deploy-LabEnvironment.ps1 │ ├── Deploy-AzureResourceGroup.ps1 │ └── azuredeploy.all.json └── Environment-Setup.md ├── Labs ├── Lab01 │ ├── Lab01.dbc │ ├── Lab01-ADF.dbc │ ├── Lab01-complete.dbc │ ├── images │ │ ├── trigger.png │ │ ├── cluster-id.png │ │ ├── copy-token.png │ │ ├── account-menu.png │ │ ├── copy-pipeline.png │ │ ├── user-settings.png │ │ ├── adf-publish-all.png │ │ ├── notebook-widgets.png │ │ ├── pipeline-success.png │ │ ├── author-and-monitor.png │ │ ├── generate-new-token.png │ │ ├── notebook-activity.png │ │ ├── resource-group-adf.png │ │ ├── storage-container.png │ │ ├── monitor-in-progress.png │ │ ├── adf-pipeline-unconnected.png │ │ ├── databricks-activity-runs.png │ │ ├── databricks-cluster-attach.png │ │ ├── notebook-activity-connect.png │ │ ├── notebook-activity-general.png │ │ ├── notebook-activity-connections.png │ │ ├── notebook-activity-linked-service.png │ │ ├── notebook-activity-settings-empty.png │ │ └── notebook-activity-settings-populated.png │ └── Lab01.md ├── Lab02 │ ├── Lab02.dbc │ ├── Lab02-complete.dbc │ ├── images │ │ ├── get-data.png │ │ ├── piechart.png │ │ ├── directquery.png │ │ ├── llap-chart1.png │ │ ├── llap-chart2.png │ │ ├── llap-chart3.png │ │ ├── llap-chart4.png │ │ ├── plot-options.png │ │ ├── completed-report.png │ │ ├── piechart-fields.png │ │ ├── piechart-format.png │ │ ├── publish-button.png │ │ ├── published-report.png │ │ ├── visualizations.png │ │ ├── waterfallchart.png │ │ ├── get-data-hdinsight.png │ │ ├── clustered-columnchart.png │ │ ├── databricks-user-menu.png │ │ ├── get-data-directquery.png │ │ ├── manage-relationships.png │ │ ├── publishing-succeeded.png │ │ ├── stacked-columnchart.png │ │ ├── waterfallchart-fields.png │ │ ├── Piechart-visualization.png │ │ ├── power-bi-service-login.png │ │ ├── databricks-generate-token.png │ │ ├── databricks-user-settings.png │ │ ├── power-bi-completed-report.png │ │ ├── power-bi-get-data-dialog.png │ │ ├── power-bi-service-publish.png │ │ ├── power-bi-spark-connection.png │ │ ├── powerbi-edit-credentials.png │ │ ├── powerbi-enter-credentials.png │ │ ├── relationship-weblogs-users.png │ │ ├── stacked-columnchart-fields.png │ │ ├── databricks-cluster-jdbc-url.png │ │ ├── power-bi-service-spark-login.png │ │ ├── power-bi-relationships-manage.png │ │ ├── power-bi-spark-connection-login.png │ │ ├── relationship-weblogs-products.png │ │ ├── databricks-cluster-jdbc-url-parsed.png │ │ ├── power-bi-relationship-weblogs-users.png │ │ ├── power-bi-service-publish-succeeded.png │ │ ├── power-bi-spark-connection-load-data.png │ │ ├── databricks-visualizations-group-by-age.png │ │ ├── databricks-visualizations-toolbar-bar.png │ │ ├── databricks-visualizations-toolbar-pie.png │ │ └── power-bi-relationship-weblogs-products.png │ └── Lab02.md ├── Lab03 │ ├── Lab03.dbc │ ├── Lab03-complete.dbc │ └── Lab03.md ├── Lab04 │ ├── Lab04.dbc │ ├── Lab04-complete.dbc │ ├── Create-ALS-Model.dbc │ ├── images │ │ ├── streaming-actions-dashboard.png │ │ └── static-actions-windowed-bar-chart.png │ └── Lab04.md └── images │ ├── plot-options.png │ └── visualizations.png └── README.md /Setup/Template/.vs/ProjectSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "CurrentProjectSetting": null 3 | } -------------------------------------------------------------------------------- /Labs/Lab01/Lab01.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/Lab01.dbc -------------------------------------------------------------------------------- /Labs/Lab02/Lab02.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/Lab02.dbc -------------------------------------------------------------------------------- /Labs/Lab03/Lab03.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab03/Lab03.dbc -------------------------------------------------------------------------------- /Labs/Lab04/Lab04.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab04/Lab04.dbc -------------------------------------------------------------------------------- /Labs/Lab01/Lab01-ADF.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/Lab01-ADF.dbc -------------------------------------------------------------------------------- /Labs/images/plot-options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/images/plot-options.png -------------------------------------------------------------------------------- /Labs/Lab01/Lab01-complete.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/Lab01-complete.dbc -------------------------------------------------------------------------------- /Labs/Lab01/images/trigger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/trigger.png -------------------------------------------------------------------------------- /Labs/Lab02/Lab02-complete.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/Lab02-complete.dbc -------------------------------------------------------------------------------- /Labs/Lab02/images/get-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/get-data.png -------------------------------------------------------------------------------- /Labs/Lab02/images/piechart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/piechart.png -------------------------------------------------------------------------------- /Labs/Lab03/Lab03-complete.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab03/Lab03-complete.dbc -------------------------------------------------------------------------------- /Labs/Lab04/Lab04-complete.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab04/Lab04-complete.dbc -------------------------------------------------------------------------------- /Labs/images/visualizations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/images/visualizations.png -------------------------------------------------------------------------------- /Setup/Template/.vs/slnx.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Setup/Template/.vs/slnx.sqlite -------------------------------------------------------------------------------- /Labs/Lab01/images/cluster-id.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/cluster-id.png -------------------------------------------------------------------------------- /Labs/Lab01/images/copy-token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/copy-token.png -------------------------------------------------------------------------------- /Labs/Lab02/images/directquery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/directquery.png -------------------------------------------------------------------------------- /Labs/Lab02/images/llap-chart1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/llap-chart1.png -------------------------------------------------------------------------------- /Labs/Lab02/images/llap-chart2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/llap-chart2.png -------------------------------------------------------------------------------- /Labs/Lab02/images/llap-chart3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/llap-chart3.png -------------------------------------------------------------------------------- /Labs/Lab02/images/llap-chart4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/llap-chart4.png -------------------------------------------------------------------------------- /Labs/Lab04/Create-ALS-Model.dbc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab04/Create-ALS-Model.dbc -------------------------------------------------------------------------------- /Labs/Lab01/images/account-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/account-menu.png -------------------------------------------------------------------------------- /Labs/Lab01/images/copy-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/copy-pipeline.png -------------------------------------------------------------------------------- /Labs/Lab01/images/user-settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/user-settings.png -------------------------------------------------------------------------------- /Labs/Lab02/images/plot-options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/plot-options.png -------------------------------------------------------------------------------- /Setup/Template/.vs/VSWorkspaceState.json: -------------------------------------------------------------------------------- 1 | { 2 | "ExpandedNodes": [ 3 | "" 4 | ], 5 | "PreviewInSolutionExplorer": false 6 | } -------------------------------------------------------------------------------- /Labs/Lab01/images/adf-publish-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/adf-publish-all.png -------------------------------------------------------------------------------- /Labs/Lab01/images/notebook-widgets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/notebook-widgets.png -------------------------------------------------------------------------------- /Labs/Lab01/images/pipeline-success.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/pipeline-success.png -------------------------------------------------------------------------------- /Labs/Lab02/images/completed-report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/completed-report.png -------------------------------------------------------------------------------- /Labs/Lab02/images/piechart-fields.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/piechart-fields.png -------------------------------------------------------------------------------- /Labs/Lab02/images/piechart-format.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/piechart-format.png -------------------------------------------------------------------------------- /Labs/Lab02/images/publish-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/publish-button.png -------------------------------------------------------------------------------- /Labs/Lab02/images/published-report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/published-report.png -------------------------------------------------------------------------------- /Labs/Lab02/images/visualizations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/visualizations.png -------------------------------------------------------------------------------- /Labs/Lab02/images/waterfallchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/waterfallchart.png -------------------------------------------------------------------------------- /Setup/Template/.vs/Template/v15/.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Setup/Template/.vs/Template/v15/.suo -------------------------------------------------------------------------------- /Labs/Lab01/images/author-and-monitor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/author-and-monitor.png -------------------------------------------------------------------------------- /Labs/Lab01/images/generate-new-token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/generate-new-token.png -------------------------------------------------------------------------------- /Labs/Lab01/images/notebook-activity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/notebook-activity.png -------------------------------------------------------------------------------- /Labs/Lab01/images/resource-group-adf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/resource-group-adf.png -------------------------------------------------------------------------------- /Labs/Lab01/images/storage-container.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/storage-container.png -------------------------------------------------------------------------------- /Labs/Lab02/images/get-data-hdinsight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/get-data-hdinsight.png -------------------------------------------------------------------------------- /Labs/Lab01/images/monitor-in-progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/monitor-in-progress.png -------------------------------------------------------------------------------- /Labs/Lab02/images/clustered-columnchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/clustered-columnchart.png -------------------------------------------------------------------------------- /Labs/Lab02/images/databricks-user-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/databricks-user-menu.png -------------------------------------------------------------------------------- /Labs/Lab02/images/get-data-directquery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/get-data-directquery.png -------------------------------------------------------------------------------- /Labs/Lab02/images/manage-relationships.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/manage-relationships.png -------------------------------------------------------------------------------- /Labs/Lab02/images/publishing-succeeded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/publishing-succeeded.png -------------------------------------------------------------------------------- /Labs/Lab02/images/stacked-columnchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/stacked-columnchart.png -------------------------------------------------------------------------------- /Labs/Lab02/images/waterfallchart-fields.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/waterfallchart-fields.png -------------------------------------------------------------------------------- /Labs/Lab02/images/Piechart-visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/Piechart-visualization.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-service-login.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-service-login.png -------------------------------------------------------------------------------- /Labs/Lab01/images/adf-pipeline-unconnected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/adf-pipeline-unconnected.png -------------------------------------------------------------------------------- /Labs/Lab01/images/databricks-activity-runs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/databricks-activity-runs.png -------------------------------------------------------------------------------- /Labs/Lab01/images/databricks-cluster-attach.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/databricks-cluster-attach.png -------------------------------------------------------------------------------- /Labs/Lab01/images/notebook-activity-connect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/notebook-activity-connect.png -------------------------------------------------------------------------------- /Labs/Lab01/images/notebook-activity-general.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/notebook-activity-general.png -------------------------------------------------------------------------------- /Labs/Lab02/images/databricks-generate-token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/databricks-generate-token.png -------------------------------------------------------------------------------- /Labs/Lab02/images/databricks-user-settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/databricks-user-settings.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-completed-report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-completed-report.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-get-data-dialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-get-data-dialog.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-service-publish.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-service-publish.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-spark-connection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-spark-connection.png -------------------------------------------------------------------------------- /Labs/Lab02/images/powerbi-edit-credentials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/powerbi-edit-credentials.png -------------------------------------------------------------------------------- /Labs/Lab02/images/powerbi-enter-credentials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/powerbi-enter-credentials.png -------------------------------------------------------------------------------- /Labs/Lab02/images/relationship-weblogs-users.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/relationship-weblogs-users.png -------------------------------------------------------------------------------- /Labs/Lab02/images/stacked-columnchart-fields.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/stacked-columnchart-fields.png -------------------------------------------------------------------------------- /Labs/Lab02/images/databricks-cluster-jdbc-url.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/databricks-cluster-jdbc-url.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-service-spark-login.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-service-spark-login.png -------------------------------------------------------------------------------- /Labs/Lab04/images/streaming-actions-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab04/images/streaming-actions-dashboard.png -------------------------------------------------------------------------------- /Labs/Lab01/images/notebook-activity-connections.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/notebook-activity-connections.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-relationships-manage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-relationships-manage.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-spark-connection-login.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-spark-connection-login.png -------------------------------------------------------------------------------- /Labs/Lab02/images/relationship-weblogs-products.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/relationship-weblogs-products.png -------------------------------------------------------------------------------- /Labs/Lab01/images/notebook-activity-linked-service.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/notebook-activity-linked-service.png -------------------------------------------------------------------------------- /Labs/Lab01/images/notebook-activity-settings-empty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/notebook-activity-settings-empty.png -------------------------------------------------------------------------------- /Labs/Lab04/images/static-actions-windowed-bar-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab04/images/static-actions-windowed-bar-chart.png -------------------------------------------------------------------------------- /Labs/Lab01/images/notebook-activity-settings-populated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab01/images/notebook-activity-settings-populated.png -------------------------------------------------------------------------------- /Labs/Lab02/images/databricks-cluster-jdbc-url-parsed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/databricks-cluster-jdbc-url-parsed.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-relationship-weblogs-users.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-relationship-weblogs-users.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-service-publish-succeeded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-service-publish-succeeded.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-spark-connection-load-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-spark-connection-load-data.png -------------------------------------------------------------------------------- /Labs/Lab02/images/databricks-visualizations-group-by-age.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/databricks-visualizations-group-by-age.png -------------------------------------------------------------------------------- /Labs/Lab02/images/databricks-visualizations-toolbar-bar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/databricks-visualizations-toolbar-bar.png -------------------------------------------------------------------------------- /Labs/Lab02/images/databricks-visualizations-toolbar-pie.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/databricks-visualizations-toolbar-pie.png -------------------------------------------------------------------------------- /Labs/Lab02/images/power-bi-relationship-weblogs-products.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Labs/Lab02/images/power-bi-relationship-weblogs-products.png -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/.vs/HDInsightLabsEnvironment/v15/.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nthacker/Databricks-Labs/master/Setup/Deployment/HDInsightLabsEnvironment/.vs/HDInsightLabsEnvironment/v15/.suo -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/HDInsightLabsEnvironment/HDInsightLabsEnvironment.deployproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | true 5 | 6 | -------------------------------------------------------------------------------- /Setup/Environment-Cleanup.md: -------------------------------------------------------------------------------- 1 | # Environment Cleanup 2 | 3 | This article describes the steps required to cleanup your Azure subscription and avoid unnecessary costs. Perform these only if you are finished with the labs. 4 | 5 | 1. Login to the Azure Portal. 6 | 2. Navigate to the resource group you deployed. 7 | 3. Select Delete resource group. 8 | 4. Type the name of the resource group and select Delete. 9 | 5. In a few minutes, all of the resources you used in these labs will be deleted. -------------------------------------------------------------------------------- /Setup/Scripts/azuredeploy.all.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": 3 | "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 4 | "contentVersion": "1.0.0.0", 5 | "parameters": { 6 | "workspaceName": { 7 | "value": "databrickslab20180501" 8 | }, 9 | "pricingTier": { 10 | "value": "premium" 11 | }, 12 | "location": { 13 | "value": "eastus" 14 | }, 15 | "workspaceCount": { 16 | "value": 1 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/HDInsightLabsEnvironment/bin/Debug/staging/HDInsightLabsEnvironment/azuredeploy.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "clusterName": { 6 | "value": "hdilabs20170914ignite01" 7 | }, 8 | "clusterLoginUserName": { 9 | "value": "admin" 10 | }, 11 | "clusterLoginPassword": { 12 | "value": "Abc!1234567890" 13 | }, 14 | "location": { 15 | "value": "westus2" 16 | }, 17 | "clusterWorkerNodeCount": { 18 | "value": 2 19 | }, 20 | "clusterKind": { 21 | "value": "SPARK" 22 | }, 23 | "clusterVersion": { 24 | "value": "3.6" 25 | }, 26 | "sshUserName": { 27 | "value": "sshuser" 28 | }, 29 | "sshPassword": { 30 | "value": "Abc!1234567890" 31 | } 32 | } 33 | } -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/HDInsightLabsEnvironment/azuredeploy.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "clusterName": { 6 | "value": "hdilabs20170914ignite01" 7 | }, 8 | "clusterLoginUserName": { 9 | "value": "admin" 10 | }, 11 | "clusterLoginPassword": { 12 | "value": "Abc!1234567890" 13 | }, 14 | "location": { 15 | "value": "westus2" 16 | }, 17 | "clusterWorkerNodeCount": { 18 | "value": 2 19 | }, 20 | "clusterKind": { 21 | "value": "SPARK" 22 | }, 23 | "clusterVersion": { 24 | "value": "3.6" 25 | }, 26 | "sshUserName": { 27 | "value": "sshuser" 28 | }, 29 | "sshPassword": { 30 | "value": "Abc!1234567890" 31 | }, 32 | "clusterCount": { 33 | "value": 1 34 | } 35 | } 36 | } -------------------------------------------------------------------------------- /Setup/Template/parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "clusterName": { 6 | "value": "hdi-labs-2017-09-14-ignite-01" 7 | }, 8 | "clusterLoginUserName": { 9 | "value": "admin" 10 | }, 11 | "clusterLoginPassword": { 12 | "value": "Abc!1234567890" 13 | }, 14 | "location": { 15 | "value": "westus2" 16 | }, 17 | "clusterWorkerNodeCount": { 18 | "value": 2 19 | }, 20 | "clusterKind": { 21 | "value": "SPARK" 22 | }, 23 | "clusterVersion": { 24 | "value": "3.6" 25 | }, 26 | "sshUserName": { 27 | "value": "sshuser" 28 | }, 29 | "sshPassword": { 30 | "value": "Abc!1234567890" 31 | } 32 | } 33 | } -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/HDInsightLabsEnvironment.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.26430.6 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{151D2E53-A2C4-4D7D-83FE-D05416EBD58E}") = "HDInsightLabsEnvironment", "HDInsightLabsEnvironment\HDInsightLabsEnvironment.deployproj", "{7F630C56-4045-4151-9CEF-51B2CB4BF04B}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {7F630C56-4045-4151-9CEF-51B2CB4BF04B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {7F630C56-4045-4151-9CEF-51B2CB4BF04B}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {7F630C56-4045-4151-9CEF-51B2CB4BF04B}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {7F630C56-4045-4151-9CEF-51B2CB4BF04B}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | EndGlobal 23 | -------------------------------------------------------------------------------- /Labs/Lab01/Lab01.md: -------------------------------------------------------------------------------- 1 | # Lab 01 - Batch & ETL processing of Big Data with Spark SQL 2 | 3 | To complete this lab, follow these steps: 4 | 5 | 1. Download a ZIP copy of this repository to your local machine and uncompress the contents. 6 | 2. Using the Azure Portal, navigate to your Azure Databricks workspace (created by the ARM template in the setup steps). 7 | 3. Create a Databricks cluster in your workspace (follow the instructions within the [Environment-Setup](../../Setup/Environment-Setup.md#2-create-and-run-a-new-azure-databricks-cluster) document). 8 | 4. Select Workspace, Users, your user account, and then select Import. 9 | 5. On the Import Notebooks dialog, select browse, and navigate to the location you unzipped the copy of the Databricks Labs repository, and select the copy of Lab01.dbc in the Lab01 folder that you downloaded from this repository. 10 | 6. Select Import. 11 | 7. Repeat the process for Lab01-ADF.dbc. 12 | 8. Repeat the process for Lab01-complete.dbc. 13 | 9. In the listing, select Lab01.dbc to open the notebook. 14 | 10. Follow the prompts within the notebook to complete the lab. 15 | 16 | **HINT: If you get stuck with the lab, take a look at the provided solution notebook called Lab01-complete.dbc.** -------------------------------------------------------------------------------- /Labs/Lab03/Lab03.md: -------------------------------------------------------------------------------- 1 | # Lab 03 - Data Science using Spark 2 | 3 | To complete this lab, follow these steps: 4 | 5 | 1. Download a ZIP copy of this repository to your local machine and uncompress the contents. 6 | 2. Using the Azure portal, navigate to your Azure Databricks workspace (created by the ARM template in the setup steps). 7 | 3. Create a Databricks cluster in your workspace (follow the instructions within the [Environment-Setup](../../Setup/Environment-Setup.md#2-create-and-run-a-new-azure-databricks-cluster) document). 8 | 4. Select Workspace, Users, your user account, and then select Import. 9 | 5. On the Import Notebooks dialog, select browse, and navigate to the location you unzipped the copy of the Databricks Labs repository, and select the copy of Lab03.dbc in the Lab03 folder that you downloaded from this repository. 10 | 6. Select Import. 11 | 7. Repeat the process for Lab03-complete.dbc. 12 | 8. If you skipped Lab 1, repeat the process for Lab01-ADF.dbc in the Lab01 folder. 13 | 9. In the listing, select Lab03.dbc to open the notebook. 14 | 10. Follow the prompts within the notebook to complete the lab. 15 | 16 | **HINT: If you get stuck with the lab, take a look at the provided solution notebook called lab03-complete.dbc.** 17 | -------------------------------------------------------------------------------- /Labs/Lab02/Lab02.md: -------------------------------------------------------------------------------- 1 | # Lab 02 - Data Warehouse / Interactive Pattern - Interactive Querying with Spark and Power BI 2 | 3 | To complete this lab, follow these steps: 4 | 5 | 1. Download a ZIP copy of this repository to your local machine and uncompress the contents. 6 | 2. Using the Azure Portal, navigate to your Azure Databricks workspace (created by the ARM template in the setup steps). 7 | 3. Create a Databricks cluster in your workspace (follow the instructions within the [Environment-Setup](../../Setup/Environment-Setup.md#2-create-and-run-a-new-azure-databricks-cluster) document). 8 | 4. Select Workspace, Users, your user account, and then select Import. 9 | 5. On the Import Notebooks dialog, select browse, and navigate to the location you unzipped the copy of the Databricks Labs repository, and select the copy of Lab02.dbc in the Lab02 folder that you downloaded from this repository. 10 | 6. Select Import. 11 | 7. Repeat the process for Lab02-complete.dbc. 12 | 8. If you skipped Lab 1, repeat the process for Lab01-ADF.dbc in the Lab01 folder. 13 | 9. In the listing, select Lab02.dbc to open the notebook. 14 | 10. Follow the prompts within the notebook to complete the lab. 15 | 16 | **HINT: If you get stuck with the lab, take a look at the provided solution notebook called lab02-complete.dbc.** 17 | -------------------------------------------------------------------------------- /Setup/Scripts/GenerateCert.ps1: -------------------------------------------------------------------------------- 1 | $certFile = "C:\Temp\SecureLdapCert.pfx" # this must be an existing folder 2 | $base64certFile = "C:\Temp\SecureLdapCertString.txt" 3 | $certPassword = "Pass@word123" 4 | $domainName = "contoso.com" 5 | 6 | $certName = "*." + $domainName # this must match the Azure AD DNS name 7 | 8 | $lifetime=Get-Date 9 | $cert = New-SelfSignedCertificate ` 10 | -Subject $certName ` 11 | -NotAfter $lifetime.AddDays(365) ` 12 | -KeyUsage DigitalSignature, KeyEncipherment ` 13 | -Type SSLServerAuthentication ` 14 | -DnsName $certName 15 | 16 | $certThumbprint = $cert.Thumbprint 17 | $cert = (Get-ChildItem -Path cert:\LocalMachine\My\$certThumbprint) 18 | 19 | $certPasswordSecureString = ConvertTo-SecureString $certPassword -AsPlainText -Force 20 | Export-PfxCertificate -Cert $cert -FilePath $certFile -Password $certPasswordSecureString 21 | 22 | $cert=New-Object System.Security.Cryptography.X509Certificates.X509Certificate2($certFile, $certPassword, [System.Security.Cryptography.X509Certificates.X509KeyStorageFlags] "Exportable, MachineKeySet, PersistKeySet") 23 | $base64cert = [Convert]::ToBase64String($cert.Export([System.Security.Cryptography.X509Certificates.X509ContentType]::Pfx, $certPassword)); 24 | $base64cert | Out-File -FilePath $base64certFile -Force 25 | Get-Content $base64certFile -------------------------------------------------------------------------------- /Labs/Lab04/Lab04.md: -------------------------------------------------------------------------------- 1 | # Lab 04 - Streaming Pattern - Processing events from Kafka using Spark and MLlib 2 | 3 | To complete this lab, follow these steps: 4 | 5 | 1. Download a ZIP copy of this repository to your local machine and uncompress the contents. 6 | 2. Using the Azure portal, navigate to your Azure Databricks workspace (created by the ARM template in the setup steps). 7 | 3. Create a Databricks cluster in your workspace (follow the instructions within the [Environment-Setup](../../Setup/Environment-Setup.md#2-create-and-run-a-new-azure-databricks-cluster) document). 8 | 4. Select Workspace, Users, your user account, and then select Import. 9 | 5. On the Import Notebooks dialog, select browse, and navigate to the location you unzipped the copy of the Databricks Labs repository, and select the copy of Lab04.dbc in the Lab04 folder that you downloaded from this repository. 10 | 6. Select Import. 11 | 7. Repeat the process for Lab03-complete.dbc. 12 | 8. If you skipped Lab 1, repeat the process for Lab01-ADF.dbc in the Lab01 folder. 13 | 9. If you skipped Lab 3, repeat the process for Create-ALS-Model.dbc in the Lab04 folder. 14 | 10. In the listing, select Lab04.dbc to open the notebook. 15 | 11. Follow the prompts within the notebook to complete the lab. 16 | 17 | **HINT: If you get stuck with the lab, take a look at the provided solution notebook called Lab04-complete.dbc.** 18 | -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/HDInsightLabsEnvironment/HDInsightLabsEnvironment.deployproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | 8 | 9 | Release 10 | AnyCPU 11 | 12 | 13 | 14 | 7f630c56-4045-4151-9cef-51b2cb4bf04b 15 | 16 | 17 | Deployment 18 | 1.0 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | False 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /Setup/Template/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | 5 | # -e: immediately exit if any command has a non-zero exit status 6 | # -o: prevents errors in a pipeline from being masked 7 | # IFS new value is less likely to cause confusing bugs when looping arrays or arguments (e.g. $@) 8 | 9 | usage() { echo "Usage: $0 -i -g -n -l " 1>&2; exit 1; } 10 | 11 | declare subscriptionId="" 12 | declare resourceGroupName="" 13 | declare deploymentName="" 14 | declare resourceGroupLocation="" 15 | 16 | # Initialize parameters specified from command line 17 | while getopts ":i:g:n:l:" arg; do 18 | case "${arg}" in 19 | i) 20 | subscriptionId=${OPTARG} 21 | ;; 22 | g) 23 | resourceGroupName=${OPTARG} 24 | ;; 25 | n) 26 | deploymentName=${OPTARG} 27 | ;; 28 | l) 29 | resourceGroupLocation=${OPTARG} 30 | ;; 31 | esac 32 | done 33 | shift $((OPTIND-1)) 34 | 35 | #Prompt for parameters is some required parameters are missing 36 | if [[ -z "$subscriptionId" ]]; then 37 | echo "Subscription Id:" 38 | read subscriptionId 39 | [[ "${subscriptionId:?}" ]] 40 | fi 41 | 42 | if [[ -z "$resourceGroupName" ]]; then 43 | echo "ResourceGroupName:" 44 | read resourceGroupName 45 | [[ "${resourceGroupName:?}" ]] 46 | fi 47 | 48 | if [[ -z "$deploymentName" ]]; then 49 | echo "DeploymentName:" 50 | read deploymentName 51 | fi 52 | 53 | if [[ -z "$resourceGroupLocation" ]]; then 54 | echo "Enter a location below to create a new resource group else skip this" 55 | echo "ResourceGroupLocation:" 56 | read resourceGroupLocation 57 | fi 58 | 59 | #templateFile Path - template file to be used 60 | templateFilePath="template.json" 61 | 62 | if [ ! -f "$templateFilePath" ]; then 63 | echo "$templateFilePath not found" 64 | exit 1 65 | fi 66 | 67 | #parameter file path 68 | parametersFilePath="parameters.json" 69 | 70 | if [ ! -f "$parametersFilePath" ]; then 71 | echo "$parametersFilePath not found" 72 | exit 1 73 | fi 74 | 75 | if [ -z "$subscriptionId" ] || [ -z "$resourceGroupName" ] || [ -z "$deploymentName" ]; then 76 | echo "Either one of subscriptionId, resourceGroupName, deploymentName is empty" 77 | usage 78 | fi 79 | 80 | #login to azure using your credentials 81 | az account show 1> /dev/null 82 | 83 | if [ $? != 0 ]; 84 | then 85 | az login 86 | fi 87 | 88 | #set the default subscription id 89 | az account set --name $subscriptionId 90 | 91 | set +e 92 | 93 | #Check for existing RG 94 | az group show $resourceGroupName 1> /dev/null 95 | 96 | if [ $? != 0 ]; then 97 | echo "Resource group with name" $resourceGroupName "could not be found. Creating new resource group.." 98 | set -e 99 | ( 100 | set -x 101 | az group create --name $resourceGroupName --location $resourceGroupLocation 1> /dev/null 102 | ) 103 | else 104 | echo "Using existing resource group..." 105 | fi 106 | 107 | #Start deployment 108 | echo "Starting deployment..." 109 | ( 110 | set -x 111 | az group deployment create --name $deploymentName --resource-group $resourceGroupName --template-file $templateFilePath --parameters $parametersFilePath 112 | ) 113 | 114 | if [ $? == 0 ]; 115 | then 116 | echo "Template has been successfully deployed" 117 | fi 118 | -------------------------------------------------------------------------------- /Setup/Template/deployer.rb: -------------------------------------------------------------------------------- 1 | require 'azure_mgmt_resources' 2 | 3 | class Deployer 4 | 5 | # Initialize the deployer class with subscription, resource group and resource group location. The class will raise an 6 | # ArgumentError if there are empty values for Tenant Id, Client Id or Client Secret environment variables. 7 | # 8 | # @param [String] subscription_id the subscription to deploy the template 9 | # @param [String] resource_group the resource group to create or update and then deploy the template 10 | # @param [String] resource_group_location the location of the resource group 11 | def initialize(subscription_id, resource_group, resource_group_location) 12 | raise ArgumentError.new("Missing template file 'template.json' in current directory.") unless File.exist?('template.json') 13 | raise ArgumentError.new("Missing parameters file 'parameters.json' in current directory.") unless File.exist?('parameters.json') 14 | @resource_group = resource_group 15 | @subscription_id = subscription_id 16 | @resource_group_location = resource_group_location 17 | provider = MsRestAzure::ApplicationTokenProvider.new( 18 | ENV['AZURE_TENANT_ID'], 19 | ENV['AZURE_CLIENT_ID'], 20 | ENV['AZURE_CLIENT_SECRET']) 21 | credentials = MsRest::TokenCredentials.new(provider) 22 | @client = Azure::ARM::Resources::ResourceManagementClient.new(credentials) 23 | @client.subscription_id = @subscription_id 24 | end 25 | 26 | # Deploy the template to a resource group 27 | def deploy 28 | # ensure the resource group is created 29 | params = Azure::ARM::Resources::Models::ResourceGroup.new.tap do |rg| 30 | rg.location = @resource_group_location 31 | end 32 | @client.resource_groups.create_or_update(@resource_group, params).value! 33 | 34 | # build the deployment from a json file template from parameters 35 | template = File.read(File.expand_path(File.join(__dir__, 'template.json'))) 36 | deployment = Azure::ARM::Resources::Models::Deployment.new 37 | deployment.properties = Azure::ARM::Resources::Models::DeploymentProperties.new 38 | deployment.properties.template = JSON.parse(template) 39 | deployment.properties.mode = Azure::ARM::Resources::Models::DeploymentMode::Incremental 40 | 41 | # build the deployment template parameters from Hash to {key: {value: value}} format 42 | deploy_params = File.read(File.expand_path(File.join(__dir__, 'parameters.json'))) 43 | deployment.properties.parameters = JSON.parse(deploy_params)["parameters"] 44 | 45 | # put the deployment to the resource group 46 | @client.deployments.create_or_update(@resource_group, 'azure-sample', deployment) 47 | end 48 | end 49 | 50 | # Get user inputs and execute the script 51 | if(ARGV.empty?) 52 | puts "Please specify subscriptionId resourceGroupName resourceGroupLocation as command line arguments" 53 | exit 54 | end 55 | 56 | subscription_id = ARGV[0] # Azure Subscription Id 57 | resource_group = ARGV[1] # The resource group for deployment 58 | resource_group_location = ARGV[2] # The resource group location 59 | 60 | msg = "\nInitializing the Deployer class with subscription id: #{subscription_id}, resource group: #{resource_group}" 61 | msg += "\nand resource group location: #{resource_group_location}...\n\n" 62 | puts msg 63 | 64 | # Initialize the deployer class 65 | deployer = Deployer.new(subscription_id, resource_group, resource_group_location) 66 | 67 | puts "Beginning the deployment... \n\n" 68 | # Deploy the template 69 | deployment = deployer.deploy 70 | 71 | puts "Done deploying!!" -------------------------------------------------------------------------------- /Setup/Template/deploy.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .SYNOPSIS 3 | Deploys a template to Azure 4 | 5 | .DESCRIPTION 6 | Deploys an Azure Resource Manager template 7 | 8 | .PARAMETER subscriptionId 9 | The subscription id where the template will be deployed. 10 | 11 | .PARAMETER resourceGroupName 12 | The resource group where the template will be deployed. Can be the name of an existing or a new resource group. 13 | 14 | .PARAMETER resourceGroupLocation 15 | Optional, a resource group location. If specified, will try to create a new resource group in this location. If not specified, assumes resource group is existing. 16 | 17 | .PARAMETER templateFilePath 18 | Optional, path to the template file. Defaults to template.json. 19 | 20 | .PARAMETER parametersFilePath 21 | Optional, path to the parameters file. Defaults to parameters.json. If file is not found, will prompt for parameter values based on template. 22 | #> 23 | 24 | param( 25 | [Parameter(Mandatory=$True)] 26 | [string] 27 | $subscriptionId, 28 | 29 | [Parameter(Mandatory=$True)] 30 | [string] 31 | $resourceGroupName, 32 | 33 | [string] 34 | $resourceGroupLocation, 35 | 36 | [string] 37 | $templateFilePath = "template.json", 38 | 39 | [string] 40 | $parametersFilePath = "parameters.json" 41 | ) 42 | 43 | <# 44 | .SYNOPSIS 45 | Registers RPs 46 | #> 47 | Function RegisterRP { 48 | Param( 49 | [string]$ResourceProviderNamespace 50 | ) 51 | 52 | Write-Host "Registering resource provider '$ResourceProviderNamespace'"; 53 | Register-AzureRmResourceProvider -ProviderNamespace $ResourceProviderNamespace; 54 | } 55 | 56 | #****************************************************************************** 57 | # Script body 58 | # Execution begins here 59 | #****************************************************************************** 60 | $ErrorActionPreference = "Stop" 61 | 62 | # sign in 63 | Write-Host "Logging in..."; 64 | Login-AzureRmAccount; 65 | 66 | # select subscription 67 | Write-Host "Selecting subscription '$subscriptionId'"; 68 | Select-AzureRmSubscription -SubscriptionID $subscriptionId; 69 | 70 | # Register RPs 71 | $resourceProviders = @("microsoft.hdinsight"); 72 | if($resourceProviders.length) { 73 | Write-Host "Registering resource providers" 74 | foreach($resourceProvider in $resourceProviders) { 75 | RegisterRP($resourceProvider); 76 | } 77 | } 78 | 79 | #Create or check for existing resource group 80 | $resourceGroup = Get-AzureRmResourceGroup -Name $resourceGroupName -ErrorAction SilentlyContinue 81 | if(!$resourceGroup) 82 | { 83 | Write-Host "Resource group '$resourceGroupName' does not exist. To create a new resource group, please enter a location."; 84 | if(!$resourceGroupLocation) { 85 | $resourceGroupLocation = Read-Host "resourceGroupLocation"; 86 | } 87 | Write-Host "Creating resource group '$resourceGroupName' in location '$resourceGroupLocation'"; 88 | New-AzureRmResourceGroup -Name $resourceGroupName -Location $resourceGroupLocation 89 | } 90 | else{ 91 | Write-Host "Using existing resource group '$resourceGroupName'"; 92 | } 93 | 94 | # Start the deployment 95 | Write-Host "Starting deployment..."; 96 | if(Test-Path $parametersFilePath) { 97 | New-AzureRmResourceGroupDeployment -ResourceGroupName $resourceGroupName -TemplateFile $templateFilePath -TemplateParameterFile $parametersFilePath; 98 | } else { 99 | New-AzureRmResourceGroupDeployment -ResourceGroupName $resourceGroupName -TemplateFile $templateFilePath; 100 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Databricks Labs 2 | 3 | The following labs are currently available, before attempting any of the labs be sure you have followed the instructions in Lab Setup. When you are done with the labs, be sure to follow the instructions under Cleanup to delete your lab environment and avoid unneeded costs. 4 | 5 | ## Setup 6 | 7 | ### [Lab Setup](Setup/Environment-Setup.md) 8 | 9 | Follow the steps in this section to setup your environment to complete the labs. 10 | 11 | ## Labs 12 | 13 | ### [Lab 1 - Batch & ETL Processing of Big Data with Spark SQL](Labs/Lab01/Lab01.md) 14 | 15 | AdventureWorks is an e-commerce retailer who is looking to improve how they manage the data assets produced by their platform. As a starting point they would like collect their data in a manner that enables easier exploration and prepares the data for downstream analytics processes that can yield new insights. AdventureWorks has asked you to process and prepare their flat file data for weblogs, users, and products into a tabular format that offers better query performance and can be queried using SQL. 16 | 17 | In the lab you will learn how to use Spark SQL (and PySpark) to batch process a 10GB text file dataset, quickly explore its content, identify issues with the data, clean and format the data and load it into global tables to support downstream analytics. You will also learn how to automate these steps using Azure Data Factory (ADF), and a Databricks Notebook activity. 18 | 19 | ### [Lab 2 - Data Warehouse Pattern - Interactive Querying with Spark and Power BI](Labs/Lab02/Lab02.md) 20 | 21 | AdventureWorks would like to create some visualizations of their data to better understand their customers. They are interested in using the powerful visualization capabilities of Power BI and its ability to allow them to share those visualizations, but aren't sure how they can pull in the data to create the dashboards. 22 | 23 | They have provided all the weblogs, users, and product tables that you need to quickly explore the data. You will prepare the data to be used in Power BI and explore the data using Spark SQL and Databricks' built-in visualizations. Finally, you will import the data into Power BI Desktop to create interactive dashboards and reports. 24 | 25 | ### [Lab 3 - Data Science using Spark](Labs/Lab03/Lab03.md) 26 | 27 | AdventureWorks would like to add a snazzy product recommendations feature to their website and email marketing campaigns that, for every user in their system, can recommend the top 10 products they might be interested in purchasing. AdventureWorks has provided you with the tables for users, products and weblogs that contains all of the data you need. 28 | 29 | In this lab, you will train a recommendation model using Spark's built-in collaborative filtering algorithm - Alternating Least Squares (ALS). Then you will use the model to pre-compute the user to product recommendation for every user and save this in a table. Then you will query from this table to quickly get the 10 product recommendations for a given user. 30 | 31 | ### [Lab 4 - Streaming Pattern: Processing streaming events using Spark and MLlib](Labs/Lab04/Lab04.md) 32 | 33 | AdventureWorks has asked for the ability to extend their product recommendations feature, integrating the trained Alternating Least Squares (ALS) recommendation model to make predictions against streaming weblog data. 34 | 35 | In this lab, you will use Spark Structured Streaming to query the data, and run the streamed data against the ALS recommendation model, getting product recommendations for a given user. 36 | 37 | ## Cleanup 38 | 39 | ### [Lab Cleanup](Setup/Environment-Cleanup.md) 40 | 41 | When you are done with the labs be sure to follow these instructions to cleanup your Azure subscription and avoid unnecessary costs. 42 | -------------------------------------------------------------------------------- /Setup/Scripts/Deploy-LabEnvironment.ps1: -------------------------------------------------------------------------------- 1 | # -skipLab1 indicates that the files should be copied to each environment's 2 | # storage container, and that Azure Data Factory should not be provisioned. 3 | # -skipLab4 indicates that HDInsight Kafka will not be used. This is the 4 | # same as setting the provisionKafka parameter to 'No' within the 5 | # azuredeploy.all.parameters.json file. 6 | Param( 7 | [string] [Parameter(Mandatory = $true)] $subscriptionId, 8 | [string] [Parameter(Mandatory = $true)] $resourceGroupName, 9 | [string] [Parameter(Mandatory = $true)] $workspaceName, 10 | [string] [Parameter(Mandatory = $true)] $workspaceCount, 11 | [string] $resourceGroupLocation = 'eastus', 12 | [switch] $skipLab1 13 | ) 14 | 15 | $destContainerName = "databricks-labs" 16 | $sourceFolder = Get-Location 17 | $workspaceInstanceName = $workspaceName 18 | $resourceGroupInstanceName = $resourceGroupName 19 | 20 | $skipLab1String = "No" 21 | if ($skipLab1) { 22 | $skipLab1String = "Yes" 23 | } 24 | 25 | # Increasing the console width to handle long string value output at end with Spark init info 26 | # $Host.UI.RawUI.BufferSize = New-Object Management.Automation.Host.Size(500, 25) 27 | 28 | Login-AzureRmAccount 29 | 30 | $sub = Select-AzureRmSubscription -SubscriptionId $subscriptionId 31 | Write-Host("Deploying instances with prefix " + $workspaceInstanceName + " in Resource Group " + $resourceGroupInstanceName + " in subscription " + $sub.Subscription.SubscriptionName + " (" + $sub.Subscription.SubscriptionId + ")") 32 | Set-Location $sourceFolder 33 | .\Deploy-AzureResourceGroup.ps1 -ResourceGroupName $resourceGroupInstanceName ` 34 | -ResourceGroupLocation $resourceGroupLocation ` 35 | -TemplateFile 'azuredeploy.all.json' ` 36 | -TemplateParametersFile 'azuredeploy.all.parameters.json' ` 37 | -workspaceName $workspaceInstanceName ` 38 | -workspaceCount $workspaceCount ` 39 | -SkipLab1 $skipLab1String 40 | $storageAccountName = $workspaceInstanceName 41 | Select-AzureRmSubscription -SubscriptionId $subscriptionId 42 | 43 | $storageKey = (Get-AzureRmStorageAccountKey -Name $storageAccountName -ResourceGroupName $resourceGroupInstanceName).Value[0] 44 | 45 | $sourceAccountName = "retaildatasamples" 46 | $sourceContainer = "data" 47 | $sourceSAS = "?sv=2017-04-17&ss=b&srt=co&sp=rl&se=2019-12-31T18:29:33Z&st=2017-09-18T10:29:33Z&spr=https&sig=bw1EJflDFx9NuvLRdBGql8RU%2FC9oz92Dz8Xs76cftJM%3D" 48 | $contextSource = New-AzureStorageContext -StorageAccountName $sourceAccountName -SasToken $sourceSAS 49 | 50 | Write-Host("Creating " + $workspaceCount + " storage containers. This can take a while.") 51 | $contextDest = New-AzureStorageContext -StorageAccountName $storageAccountName -StorageAccountKey $storageKey 52 | For ($i = 0; $i -lt $workspaceCount; $i++) { 53 | $destContainerName = $workspaceName + $i 54 | ### Create a Blob Container in the Storage Account 55 | New-AzureStorageContainer -Context $contextDest -Name $destContainerName; 56 | 57 | if ($skipLab1) { 58 | # Copy blob files to the storage container if skipping Lab 1 59 | Get-AzureStorageBlob -Context $contextSource -Container $sourceContainer -Blob "*.csv" | Start-AzureStorageBlobCopy -DestContext $contextDest -DestContainer $destContainerName 60 | Get-AzureStorageBlob -Context $contextSource -Container $sourceContainer -Blob "*.txt" | Start-AzureStorageBlobCopy -DestContext $contextDest -DestContainer $destContainerName 61 | } 62 | 63 | Write-Host("Copy the following to your Databricks cluster init configuration (blob files will be located in a container named '" + $destContainerName + "'):") 64 | Write-Host("spark.hadoop.fs.azure.account.key." + $storageAccountName + ".blob.core.windows.net " + $storageKey) -ForegroundColor Cyan 65 | } 66 | Write-Host("Storage container creation complete, and deployment operations are finished. If there are no errors, you are free to begin using the workspace and clusters.") -ForegroundColor Green -------------------------------------------------------------------------------- /Setup/Template/template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://schema.management.azure.com/schemas/2014-04-01-preview/deploymentTemplate.json#", 3 | "contentVersion": "0.9.0.0", 4 | "parameters": { 5 | "clusterName": { 6 | "type": "string", 7 | "metadata": { 8 | "description": "The name of the HDInsight cluster to create." 9 | } 10 | }, 11 | "clusterLoginUserName": { 12 | "type": "string", 13 | "defaultValue": "admin", 14 | "metadata": { 15 | "description": "These credentials can be used to submit jobs to the cluster and to log into cluster dashboards." 16 | } 17 | }, 18 | "clusterLoginPassword": { 19 | "type": "securestring", 20 | "metadata": { 21 | "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter." 22 | } 23 | }, 24 | "location": { 25 | "type": "string", 26 | "defaultValue": "westus2", 27 | "metadata": { 28 | "description": "The location where all azure resources will be deployed." 29 | } 30 | }, 31 | "clusterVersion": { 32 | "type": "string", 33 | "defaultValue": "3.6", 34 | "metadata": { 35 | "description": "HDInsight cluster version." 36 | } 37 | }, 38 | "clusterWorkerNodeCount": { 39 | "type": "int", 40 | "defaultValue": 2, 41 | "metadata": { 42 | "description": "The number of nodes in the HDInsight cluster." 43 | } 44 | }, 45 | "clusterKind": { 46 | "type": "string", 47 | "defaultValue": "SPARK", 48 | "metadata": { 49 | "description": "The type of the HDInsight cluster to create." 50 | } 51 | }, 52 | "sshUserName": { 53 | "type": "string", 54 | "defaultValue": "sshuser", 55 | "metadata": { 56 | "description": "These credentials can be used to remotely access the cluster." 57 | } 58 | }, 59 | "sshPassword": { 60 | "type": "securestring", 61 | "metadata": { 62 | "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter." 63 | } 64 | } 65 | }, 66 | "resources": [ 67 | { 68 | "apiVersion": "2015-03-01-preview", 69 | "name": "[parameters('clusterName')]", 70 | "type": "Microsoft.HDInsight/clusters", 71 | "location": "[parameters('location')]", 72 | "dependsOn": [], 73 | "properties": { 74 | "clusterVersion": "[parameters('clusterVersion')]", 75 | "osType": "Linux", 76 | "tier": "standard", 77 | "clusterDefinition": { 78 | "kind": "[parameters('clusterKind')]", 79 | "configurations": { 80 | "gateway": { 81 | "restAuthCredential.isEnabled": true, 82 | "restAuthCredential.username": "[parameters('clusterLoginUserName')]", 83 | "restAuthCredential.password": "[parameters('clusterLoginPassword')]" 84 | } 85 | } 86 | }, 87 | "storageProfile": { 88 | "storageaccounts": [ 89 | { 90 | "name": "hdilabs20170914ignite01.blob.core.windows.net", 91 | "isDefault": true, 92 | "container": "hdi-labs", 93 | "key": "leAUY1bGNNFBiQq9JCNuc9J52DgOOikQtwano1EBANy4ttgNDCHT/RVUrIBkWTtl+SsPS6lss1IfqXMpAIi0UA==" 94 | } 95 | ] 96 | }, 97 | "computeProfile": { 98 | "roles": [ 99 | { 100 | "name": "headnode", 101 | "minInstanceCount": 1, 102 | "targetInstanceCount": 2, 103 | "hardwareProfile": { 104 | "vmSize": "Standard_D12_V2" 105 | }, 106 | "osProfile": { 107 | "linuxOperatingSystemProfile": { 108 | "username": "[parameters('sshUserName')]", 109 | "password": "[parameters('sshPassword')]" 110 | } 111 | }, 112 | "virtualNetworkProfile": null, 113 | "scriptActions": [] 114 | }, 115 | { 116 | "name": "workernode", 117 | "minInstanceCount": 1, 118 | "targetInstanceCount": 2, 119 | "hardwareProfile": { 120 | "vmSize": "Standard_D12_V2" 121 | }, 122 | "osProfile": { 123 | "linuxOperatingSystemProfile": { 124 | "username": "[parameters('sshUserName')]", 125 | "password": "[parameters('sshPassword')]" 126 | } 127 | }, 128 | "virtualNetworkProfile": null, 129 | "scriptActions": [] 130 | } 131 | ] 132 | } 133 | } 134 | } 135 | ] 136 | } -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/HDInsightLabsEnvironment/Deployment.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Debug 5 | AnyCPU 6 | bin\$(Configuration)\ 7 | false 8 | true 9 | false 10 | None 11 | obj\ 12 | $(BaseIntermediateOutputPath)\ 13 | $(BaseIntermediateOutputPath)$(Configuration)\ 14 | $(IntermediateOutputPath)ProjectReferences 15 | $(ProjectReferencesOutputPath)\ 16 | true 17 | 18 | 19 | 20 | false 21 | false 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | Always 33 | 34 | 35 | Never 36 | 37 | 38 | false 39 | Build 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | _GetDeploymentProjectContent; 48 | _CalculateContentOutputRelativePaths; 49 | _GetReferencedProjectsOutput; 50 | _CalculateArtifactStagingDirectory; 51 | _CopyOutputToArtifactStagingDirectory; 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | Configuration=$(Configuration);Platform=$(Platform) 69 | 70 | 71 | 75 | 76 | 77 | 78 | $([System.IO.Path]::GetFileNameWithoutExtension('%(ProjectReference.Identity)')) 79 | 80 | 81 | 82 | 83 | 84 | 85 | $(OutDir) 86 | $(OutputPath) 87 | $(ArtifactStagingDirectory)\ 88 | $(ArtifactStagingDirectory)staging\ 89 | $(Build_StagingDirectory) 90 | 91 | 92 | 93 | 94 | 96 | 97 | <_OriginalIdentity>%(DeploymentProjectContentOutput.Identity) 98 | <_RelativePath>$(_OriginalIdentity.Replace('$(MSBuildProjectDirectory)', '')) 99 | 100 | 101 | 102 | 103 | $(_RelativePath) 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | PrepareForRun 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /Setup/Template/DeploymentHelper.cs: -------------------------------------------------------------------------------- 1 | // Requires the following Azure NuGet packages and related dependencies: 2 | // package id="Microsoft.Azure.Management.Authorization" version="2.0.0" 3 | // package id="Microsoft.Azure.Management.ResourceManager" version="1.4.0-preview" 4 | // package id="Microsoft.Rest.ClientRuntime.Azure.Authentication" version="2.2.8-preview" 5 | 6 | using Microsoft.Azure.Management.ResourceManager; 7 | using Microsoft.Azure.Management.ResourceManager.Models; 8 | using Microsoft.Rest.Azure.Authentication; 9 | using Newtonsoft.Json; 10 | using Newtonsoft.Json.Linq; 11 | using System; 12 | using System.IO; 13 | 14 | namespace PortalGenerated 15 | { 16 | /// 17 | /// This is a helper class for deploying an Azure Resource Manager template 18 | /// More info about template deployments can be found here https://go.microsoft.com/fwLink/?LinkID=733371 19 | /// 20 | class DeploymentHelper 21 | { 22 | string subscriptionId = "your-subscription-id"; 23 | string clientId = "your-service-principal-clientId"; 24 | string clientSecret = "your-service-principal-client-secret"; 25 | string resourceGroupName = "resource-group-name"; 26 | string deploymentName = "deployment-name"; 27 | string resourceGroupLocation = "resource-group-location"; // must be specified for creating a new resource group 28 | string pathToTemplateFile = "path-to-template.json-on-disk"; 29 | string pathToParameterFile = "path-to-parameters.json-on-disk"; 30 | string tenantId = "tenant-id"; 31 | 32 | public async void Run() 33 | { 34 | // Try to obtain the service credentials 35 | var serviceCreds = await ApplicationTokenProvider.LoginSilentAsync(tenantId, clientId, clientSecret); 36 | 37 | // Read the template and parameter file contents 38 | JObject templateFileContents = GetJsonFileContents(pathToTemplateFile); 39 | JObject parameterFileContents = GetJsonFileContents(pathToParameterFile); 40 | 41 | // Create the resource manager client 42 | var resourceManagementClient = new ResourceManagementClient(serviceCreds); 43 | resourceManagementClient.SubscriptionId = subscriptionId; 44 | 45 | // Create or check that resource group exists 46 | EnsureResourceGroupExists(resourceManagementClient, resourceGroupName, resourceGroupLocation); 47 | 48 | // Start a deployment 49 | DeployTemplate(resourceManagementClient, resourceGroupName, deploymentName, templateFileContents, parameterFileContents); 50 | } 51 | 52 | /// 53 | /// Reads a JSON file from the specified path 54 | /// 55 | /// The full path to the JSON file 56 | /// The JSON file contents 57 | private JObject GetJsonFileContents(string pathToJson) 58 | { 59 | JObject templatefileContent = new JObject(); 60 | using (StreamReader file = File.OpenText(pathToJson)) 61 | { 62 | using (JsonTextReader reader = new JsonTextReader(file)) 63 | { 64 | templatefileContent = (JObject)JToken.ReadFrom(reader); 65 | return templatefileContent; 66 | } 67 | } 68 | } 69 | 70 | /// 71 | /// Ensures that a resource group with the specified name exists. If it does not, will attempt to create one. 72 | /// 73 | /// The resource manager client. 74 | /// The name of the resource group. 75 | /// The resource group location. Required when creating a new resource group. 76 | private static void EnsureResourceGroupExists(ResourceManagementClient resourceManagementClient, string resourceGroupName, string resourceGroupLocation) 77 | { 78 | if (resourceManagementClient.ResourceGroups.CheckExistence(resourceGroupName) != true) 79 | { 80 | Console.WriteLine(string.Format("Creating resource group '{0}' in location '{1}'", resourceGroupName, resourceGroupLocation)); 81 | var resourceGroup = new ResourceGroup(); 82 | resourceGroup.Location = resourceGroupLocation; 83 | resourceManagementClient.ResourceGroups.CreateOrUpdate(resourceGroupName, resourceGroup); 84 | } 85 | else 86 | { 87 | Console.WriteLine(string.Format("Using existing resource group '{0}'", resourceGroupName)); 88 | } 89 | } 90 | 91 | /// 92 | /// Starts a template deployment. 93 | /// 94 | /// The resource manager client. 95 | /// The name of the resource group. 96 | /// The name of the deployment. 97 | /// The template file contents. 98 | /// The parameter file contents. 99 | private static void DeployTemplate(ResourceManagementClient resourceManagementClient, string resourceGroupName, string deploymentName, JObject templateFileContents, JObject parameterFileContents) 100 | { 101 | Console.WriteLine(string.Format("Starting template deployment '{0}' in resource group '{1}'", deploymentName, resourceGroupName)); 102 | var deployment = new Deployment(); 103 | 104 | deployment.Properties = new DeploymentProperties 105 | { 106 | Mode = DeploymentMode.Incremental, 107 | Template = templateFileContents, 108 | Parameters = parameterFileContents["parameters"].ToObject() 109 | }; 110 | 111 | var deploymentResult = resourceManagementClient.Deployments.CreateOrUpdate(resourceGroupName, deploymentName, deployment); 112 | Console.WriteLine(string.Format("Deployment status: {0}", deploymentResult.Properties.ProvisioningState)); 113 | } 114 | } 115 | } -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/HDInsightLabsEnvironment/bin/Debug/staging/HDInsightLabsEnvironment/azuredeploy.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://schema.management.azure.com/schemas/2014-04-01-preview/deploymentTemplate.json#", 3 | "contentVersion": "0.9.0.0", 4 | "parameters": { 5 | "clusterAttachedStorageType": { 6 | "type": "string", 7 | "defaultValue": "Standard_LRS", 8 | "allowedValues": [ 9 | "Standard_LRS", 10 | "Standard_ZRS", 11 | "Standard_GRS", 12 | "Standard_RAGRS", 13 | "Premium_LRS" 14 | ] 15 | }, 16 | "clusterName": { 17 | "type": "string", 18 | "metadata": { 19 | "description": "The name of the HDInsight cluster to create." 20 | } 21 | }, 22 | "clusterLoginUserName": { 23 | "type": "string", 24 | "defaultValue": "admin", 25 | "metadata": { 26 | "description": "These credentials can be used to submit jobs to the cluster and to log into cluster dashboards." 27 | } 28 | }, 29 | "clusterLoginPassword": { 30 | "type": "securestring", 31 | "metadata": { 32 | "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter." 33 | } 34 | }, 35 | "location": { 36 | "type": "string", 37 | "defaultValue": "westus2", 38 | "metadata": { 39 | "description": "The location where all azure resources will be deployed." 40 | } 41 | }, 42 | "clusterVersion": { 43 | "type": "string", 44 | "defaultValue": "3.6", 45 | "metadata": { 46 | "description": "HDInsight cluster version." 47 | } 48 | }, 49 | "clusterWorkerNodeCount": { 50 | "type": "int", 51 | "defaultValue": 2, 52 | "metadata": { 53 | "description": "The number of nodes in the HDInsight cluster." 54 | } 55 | }, 56 | "clusterKind": { 57 | "type": "string", 58 | "defaultValue": "SPARK", 59 | "metadata": { 60 | "description": "The type of the HDInsight cluster to create." 61 | } 62 | }, 63 | "sshUserName": { 64 | "type": "string", 65 | "defaultValue": "sshuser", 66 | "metadata": { 67 | "description": "These credentials can be used to remotely access the cluster." 68 | } 69 | }, 70 | "sshPassword": { 71 | "type": "securestring", 72 | "metadata": { 73 | "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter." 74 | } 75 | } 76 | }, 77 | "variables": { 78 | "defaultApiVersion": "2015-05-01-preview", 79 | "clusterAttachedStorageName": "[parameters('clusterName')]", 80 | "clusterAttachedStorageResourcePath": "/subscriptions/30fc406c-c745-44f0-be2d-63b1c860cde0/resourceGroups/hdilabs/providers/Microsoft.Storage/storageAccounts/solliancedemodata" 81 | }, 82 | "resources": [ 83 | { 84 | "name": "[variables('clusterAttachedStorageName')]", 85 | "type": "Microsoft.Storage/storageAccounts", 86 | "location": "[parameters('location')]", 87 | "apiVersion": "2016-01-01", 88 | "sku": { 89 | "name": "[parameters('clusterAttachedStorageType')]" 90 | }, 91 | "dependsOn": [], 92 | "tags": { 93 | "displayName": "clusterattachedstorage" 94 | }, 95 | "kind": "Storage" 96 | }, 97 | { 98 | "apiVersion": "2015-03-01-preview", 99 | "name": "[parameters('clusterName')]", 100 | "type": "Microsoft.HDInsight/clusters", 101 | "location": "[parameters('location')]", 102 | "dependsOn": [ "[resourceId('Microsoft.Storage/storageAccounts', variables('clusterAttachedStorageName'))]" ], 103 | "properties": { 104 | "clusterVersion": "[parameters('clusterVersion')]", 105 | "osType": "Linux", 106 | "tier": "standard", 107 | "clusterDefinition": { 108 | "kind": "[parameters('clusterKind')]", 109 | "configurations": { 110 | "gateway": { 111 | "restAuthCredential.isEnabled": true, 112 | "restAuthCredential.username": "[parameters('clusterLoginUserName')]", 113 | "restAuthCredential.password": "[parameters('clusterLoginPassword')]" 114 | } 115 | } 116 | }, 117 | "storageProfile": { 118 | "storageaccounts": [ 119 | { 120 | "name": "[concat(variables('clusterAttachedStorageName'),'.blob.core.windows.net')]", 121 | "isDefault": true, 122 | "container": "hdi-labs", 123 | "key": "[listKeys(resourceId('Microsoft.Storage/storageAccounts', variables('clusterAttachedStorageName')), variables('defaultApiVersion')).key1]" 124 | } 125 | ] 126 | }, 127 | "computeProfile": { 128 | "roles": [ 129 | { 130 | "name": "headnode", 131 | "minInstanceCount": 1, 132 | "targetInstanceCount": 2, 133 | "hardwareProfile": { 134 | "vmSize": "Standard_D12_V2" 135 | }, 136 | "osProfile": { 137 | "linuxOperatingSystemProfile": { 138 | "username": "[parameters('sshUserName')]", 139 | "password": "[parameters('sshPassword')]" 140 | } 141 | }, 142 | "virtualNetworkProfile": null, 143 | "scriptActions": [] 144 | }, 145 | { 146 | "name": "workernode", 147 | "minInstanceCount": 1, 148 | "targetInstanceCount": 2, 149 | "hardwareProfile": { 150 | "vmSize": "Standard_D12_V2" 151 | }, 152 | "osProfile": { 153 | "linuxOperatingSystemProfile": { 154 | "username": "[parameters('sshUserName')]", 155 | "password": "[parameters('sshPassword')]" 156 | } 157 | }, 158 | "virtualNetworkProfile": null, 159 | "scriptActions": [] 160 | } 161 | ] 162 | } 163 | } 164 | } 165 | ], 166 | "outputs": {} 167 | } 168 | -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/HDInsightLabsEnvironment/azuredeploy.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://schema.management.azure.com/schemas/2014-04-01-preview/deploymentTemplate.json#", 3 | "contentVersion": "0.9.0.0", 4 | "parameters": { 5 | "clusterAttachedStorageType": { 6 | "type": "string", 7 | "defaultValue": "Standard_LRS", 8 | "allowedValues": [ 9 | "Standard_LRS", 10 | "Standard_ZRS", 11 | "Standard_GRS", 12 | "Standard_RAGRS", 13 | "Premium_LRS" 14 | ] 15 | }, 16 | "clusterName": { 17 | "type": "string", 18 | "metadata": { 19 | "description": "The name of the HDInsight cluster to create." 20 | } 21 | }, 22 | "clusterLoginUserName": { 23 | "type": "string", 24 | "defaultValue": "admin", 25 | "metadata": { 26 | "description": "These credentials can be used to submit jobs to the cluster and to log into cluster dashboards." 27 | } 28 | }, 29 | "clusterLoginPassword": { 30 | "type": "securestring", 31 | "metadata": { 32 | "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter." 33 | } 34 | }, 35 | "location": { 36 | "type": "string", 37 | "defaultValue": "westus2", 38 | "metadata": { 39 | "description": "The location where all azure resources will be deployed." 40 | } 41 | }, 42 | "clusterVersion": { 43 | "type": "string", 44 | "defaultValue": "3.6", 45 | "metadata": { 46 | "description": "HDInsight cluster version." 47 | } 48 | }, 49 | "clusterWorkerNodeCount": { 50 | "type": "int", 51 | "defaultValue": 2, 52 | "metadata": { 53 | "description": "The number of nodes in the HDInsight cluster." 54 | } 55 | }, 56 | "clusterKind": { 57 | "type": "string", 58 | "defaultValue": "SPARK", 59 | "metadata": { 60 | "description": "The type of the HDInsight cluster to create." 61 | } 62 | }, 63 | "sshUserName": { 64 | "type": "string", 65 | "defaultValue": "sshuser", 66 | "metadata": { 67 | "description": "These credentials can be used to remotely access the cluster." 68 | } 69 | }, 70 | "sshPassword": { 71 | "type": "securestring", 72 | "metadata": { 73 | "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter." 74 | } 75 | }, 76 | "clusterCount": { 77 | "type": "int", 78 | "defaultValue": 1, 79 | "metadata": { 80 | "description": "The number of HDInsight clusters to deploy." 81 | } 82 | } 83 | }, 84 | "variables": { 85 | "defaultApiVersion": "2015-05-01-preview", 86 | "clusterAttachedStorageName": "[parameters('clusterName')]" 87 | }, 88 | "resources": [ 89 | { 90 | "name": "[variables('clusterAttachedStorageName')]", 91 | "type": "Microsoft.Storage/storageAccounts", 92 | "location": "[parameters('location')]", 93 | "apiVersion": "2016-01-01", 94 | "sku": { 95 | "name": "[parameters('clusterAttachedStorageType')]" 96 | }, 97 | "dependsOn": [], 98 | "tags": { 99 | "displayName": "clusterattachedstorage" 100 | }, 101 | "kind": "Storage" 102 | }, 103 | { 104 | "apiVersion": "2015-03-01-preview", 105 | "name": "[concat(parameters('clusterName'), copyIndex())]", 106 | "copy": { 107 | "name": "hdinsightcopy", 108 | "count": "[parameters('clusterCount')]" 109 | }, 110 | "type": "Microsoft.HDInsight/clusters", 111 | "location": "[parameters('location')]", 112 | "dependsOn": [ "[resourceId('Microsoft.Storage/storageAccounts', variables('clusterAttachedStorageName'))]" ], 113 | "properties": { 114 | "clusterVersion": "[parameters('clusterVersion')]", 115 | "osType": "Linux", 116 | "tier": "standard", 117 | "clusterDefinition": { 118 | "kind": "[parameters('clusterKind')]", 119 | "configurations": { 120 | "gateway": { 121 | "restAuthCredential.isEnabled": true, 122 | "restAuthCredential.username": "[parameters('clusterLoginUserName')]", 123 | "restAuthCredential.password": "[parameters('clusterLoginPassword')]" 124 | } 125 | } 126 | }, 127 | "storageProfile": { 128 | "storageaccounts": [ 129 | { 130 | "name": "[concat(variables('clusterAttachedStorageName'),'.blob.core.windows.net')]", 131 | "isDefault": true, 132 | "container": "[concat(parameters('clusterName'), copyIndex())]", 133 | "key": "[listKeys(resourceId('Microsoft.Storage/storageAccounts', variables('clusterAttachedStorageName')), variables('defaultApiVersion')).key1]" 134 | } 135 | ] 136 | }, 137 | "computeProfile": { 138 | "roles": [ 139 | { 140 | "name": "headnode", 141 | "minInstanceCount": 1, 142 | "targetInstanceCount": 2, 143 | "hardwareProfile": { 144 | "vmSize": "Standard_D12_V2" 145 | }, 146 | "osProfile": { 147 | "linuxOperatingSystemProfile": { 148 | "username": "[parameters('sshUserName')]", 149 | "password": "[parameters('sshPassword')]" 150 | } 151 | }, 152 | "virtualNetworkProfile": null, 153 | "scriptActions": [] 154 | }, 155 | { 156 | "name": "workernode", 157 | "minInstanceCount": 1, 158 | "targetInstanceCount": 2, 159 | "hardwareProfile": { 160 | "vmSize": "Standard_D12_V2" 161 | }, 162 | "osProfile": { 163 | "linuxOperatingSystemProfile": { 164 | "username": "[parameters('sshUserName')]", 165 | "password": "[parameters('sshPassword')]" 166 | } 167 | }, 168 | "virtualNetworkProfile": null, 169 | "scriptActions": [] 170 | } 171 | ] 172 | } 173 | } 174 | } 175 | ], 176 | "outputs": {} 177 | } 178 | -------------------------------------------------------------------------------- /Setup/Scripts/Deploy-AzureResourceGroup.ps1: -------------------------------------------------------------------------------- 1 | #Requires -Version 3.0 2 | 3 | Param( 4 | [string] [Parameter(Mandatory = $true)] $ResourceGroupLocation, 5 | [string] [Parameter(Mandatory = $true)] $WorkspaceName, 6 | [string] $ResourceGroupName = 'DatabricksLabsEnvironment', 7 | [string] $WorkspaceCount = '1', 8 | [switch] $UploadArtifacts, 9 | [string] $StorageAccountName, 10 | [string] $StorageContainerName = $ResourceGroupName.ToLowerInvariant() + '-stageartifacts', 11 | [string] $TemplateFile = 'azuredeploy.json', 12 | [string] $TemplateParametersFile = 'azuredeploy.parameters.json', 13 | [string] $ArtifactStagingDirectory = '.', 14 | [string] $DSCSourceFolder = 'DSC', 15 | [switch] $ValidateOnly, 16 | [string] $SkipLab1String 17 | ) 18 | 19 | try { 20 | [Microsoft.Azure.Common.Authentication.AzureSession]::ClientFactory.AddUserAgent("VSAzureTools-$UI$($host.name)".replace(' ', '_'), '3.0.0') 21 | } 22 | catch { } 23 | 24 | $ErrorActionPreference = 'Stop' 25 | Set-StrictMode -Version 3 26 | 27 | function Format-ValidationOutput { 28 | param ($ValidationOutput, [int] $Depth = 0) 29 | Set-StrictMode -Off 30 | return @($ValidationOutput | Where-Object { $_ -ne $null } | ForEach-Object { @(' ' * $Depth + ': ' + $_.Message) + @(Format-ValidationOutput @($_.Details) ($Depth + 1)) }) 31 | } 32 | 33 | $OptionalParameters = New-Object -TypeName Hashtable 34 | $TemplateFile = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $TemplateFile)) 35 | $TemplateParametersFile = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $TemplateParametersFile)) 36 | 37 | if ($UploadArtifacts) { 38 | # Convert relative paths to absolute paths if needed 39 | $ArtifactStagingDirectory = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $ArtifactStagingDirectory)) 40 | $DSCSourceFolder = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $DSCSourceFolder)) 41 | 42 | # Parse the parameter file and update the values of artifacts location and artifacts location SAS token if they are present 43 | $JsonParameters = Get-Content $TemplateParametersFile -Raw | ConvertFrom-Json 44 | if (($JsonParameters | Get-Member -Type NoteProperty 'parameters') -ne $null) { 45 | $JsonParameters = $JsonParameters.parameters 46 | } 47 | $ArtifactsLocationName = '_artifactsLocation' 48 | $ArtifactsLocationSasTokenName = '_artifactsLocationSasToken' 49 | $OptionalParameters[$ArtifactsLocationName] = $JsonParameters | Select -Expand $ArtifactsLocationName -ErrorAction Ignore | Select -Expand 'value' -ErrorAction Ignore 50 | $OptionalParameters[$ArtifactsLocationSasTokenName] = $JsonParameters | Select -Expand $ArtifactsLocationSasTokenName -ErrorAction Ignore | Select -Expand 'value' -ErrorAction Ignore 51 | 52 | # Create DSC configuration archive 53 | if (Test-Path $DSCSourceFolder) { 54 | $DSCSourceFilePaths = @(Get-ChildItem $DSCSourceFolder -File -Filter '*.ps1' | ForEach-Object -Process {$_.FullName}) 55 | foreach ($DSCSourceFilePath in $DSCSourceFilePaths) { 56 | $DSCArchiveFilePath = $DSCSourceFilePath.Substring(0, $DSCSourceFilePath.Length - 4) + '.zip' 57 | Publish-AzureRmVMDscConfiguration $DSCSourceFilePath -OutputArchivePath $DSCArchiveFilePath -Force -Verbose 58 | } 59 | } 60 | 61 | # Create a storage account name if none was provided 62 | if ($StorageAccountName -eq '') { 63 | $StorageAccountName = 'stage' + ((Get-AzureRmContext).Subscription.SubscriptionId).Replace('-', '').substring(0, 19) 64 | } 65 | 66 | $StorageAccount = (Get-AzureRmStorageAccount | Where-Object {$_.StorageAccountName -eq $StorageAccountName}) 67 | 68 | # Create the storage account if it doesn't already exist 69 | if ($StorageAccount -eq $null) { 70 | $StorageResourceGroupName = 'ARM_Deploy_Staging' 71 | New-AzureRmResourceGroup -Location "$ResourceGroupLocation" -Name $StorageResourceGroupName -Force 72 | $StorageAccount = New-AzureRmStorageAccount -StorageAccountName $StorageAccountName -Type 'Standard_LRS' -ResourceGroupName $StorageResourceGroupName -Location "$ResourceGroupLocation" 73 | } 74 | 75 | # Generate the value for artifacts location if it is not provided in the parameter file 76 | if ($OptionalParameters[$ArtifactsLocationName] -eq $null) { 77 | $OptionalParameters[$ArtifactsLocationName] = $StorageAccount.Context.BlobEndPoint + $StorageContainerName 78 | } 79 | 80 | # Copy files from the local storage staging location to the storage account container 81 | New-AzureStorageContainer -Name $StorageContainerName -Context $StorageAccount.Context -ErrorAction SilentlyContinue *>&1 82 | 83 | $ArtifactFilePaths = Get-ChildItem $ArtifactStagingDirectory -Recurse -File | ForEach-Object -Process {$_.FullName} 84 | foreach ($SourcePath in $ArtifactFilePaths) { 85 | Set-AzureStorageBlobContent -File $SourcePath -Blob $SourcePath.Substring($ArtifactStagingDirectory.length + 1) ` 86 | -Container $StorageContainerName -Context $StorageAccount.Context -Force 87 | } 88 | 89 | # Generate a 4 hour SAS token for the artifacts location if one was not provided in the parameters file 90 | if ($OptionalParameters[$ArtifactsLocationSasTokenName] -eq $null) { 91 | $OptionalParameters[$ArtifactsLocationSasTokenName] = ConvertTo-SecureString -AsPlainText -Force ` 92 | (New-AzureStorageContainerSASToken -Container $StorageContainerName -Context $StorageAccount.Context -Permission r -ExpiryTime (Get-Date).AddHours(4)) 93 | } 94 | } 95 | 96 | # Create or update the resource group using the specified template file and template parameters file 97 | New-AzureRmResourceGroup -Name $ResourceGroupName -Location $ResourceGroupLocation -Verbose -Force 98 | 99 | if ($ValidateOnly) { 100 | $ErrorMessages = Format-ValidationOutput (Test-AzureRmResourceGroupDeployment -ResourceGroupName $ResourceGroupName ` 101 | -TemplateFile $TemplateFile ` 102 | -TemplateParameterFile $TemplateParametersFile ` 103 | @OptionalParameters) 104 | if ($ErrorMessages) { 105 | Write-Output '', 'Validation returned the following errors:', @($ErrorMessages), '', 'Template is invalid.' 106 | } 107 | else { 108 | Write-Output '', 'Template is valid.' 109 | } 110 | } 111 | else { 112 | New-AzureRmResourceGroupDeployment -Name ((Get-ChildItem $TemplateFile).BaseName + '-' + ((Get-Date).ToUniversalTime()).ToString('MMdd-HHmm')) ` 113 | -ResourceGroupName $ResourceGroupName ` 114 | -TemplateFile $TemplateFile ` 115 | -TemplateParameterFile $TemplateParametersFile ` 116 | -workspaceName $WorkspaceName ` 117 | -workspaceCount $WorkspaceCount ` 118 | -location $ResourceGroupLocation ` 119 | -skipLab1 $SkipLab1String ` 120 | @OptionalParameters ` 121 | -Force -Verbose ` 122 | -ErrorVariable ErrorMessages 123 | if ($ErrorMessages) { 124 | Write-Output '', 'Template deployment returned the following errors:', @(@($ErrorMessages) | ForEach-Object { $_.Exception.Message.TrimEnd("`r`n") }) 125 | } 126 | } -------------------------------------------------------------------------------- /Setup/Environment-Setup.md: -------------------------------------------------------------------------------- 1 | # Environment Setup 2 | 3 | This article describes the steps required to setup the environment in order to conduct the labs. 4 | 5 | ## 1. Deploy the Environment: Databricks Workspace, Azure Data Factory v2, Attached Storage Accounts, and Sample Data 6 | 7 | An ARM template and script is provided to aid in provisioning the clusters for attendees to use. Follow these steps to deploy your cluster: 8 | 9 | 1. Open PowerShell and run the following command to login to your Azure account: 10 | 11 | ```PowerShell 12 | Login-AzureRmAccount 13 | ``` 14 | 15 | 1. If you have more than one Azure subscription, execute the following to view your list of available subscriptions: 16 | 17 | ```PowerShell 18 | Get-AzureRmSubscription 19 | ``` 20 | 21 | 1. Execute the following to set the subscription to the appropriate one, if needed: 22 | 23 | ```PowerShell 24 | Select-AzureRmSubscription -SubscriptionName "" 25 | ``` 26 | 27 | 1. Confirm your default environment: 28 | 29 | ```PowerShell 30 | (Get-AzureRmContext).Subscription 31 | ``` 32 | 33 | 1. In PowerShell, navigate to the Setup\Scripts folder. 34 | 1. Next, you will execute the `.\Deploy-LabEnvironment.ps1` PowerShell script, passing in the following parameters: 35 | 36 | 1. subscriptionId (Mandatory) 37 | 2. resourceGroupName (Mandatory) 38 | 3. workspaceName (Mandatory) 39 | 4. workspaceCount (Mandatory) 40 | 5. resourceGroupLocation (Default value = 'eastus') 41 | 6. skipLab1 (optional switch) 42 | 43 | 1. Run the following command to provision the workspace (be sure to provide a unique workspace name): 44 | 45 | ```PowerShell 46 | .\Deploy-LabEnvironment.ps1 -subscriptionId "[subscriptionID]" -resourceGroupName "[newResourceGroupName]" -workspaceName "[workspaceNamePrefix]" -workspaceCount 1 -resourceGroupLocation "[location]" 47 | ``` 48 | 49 | For example, the following creates the environment in the East US location, where 2 clusters are created sharing one storage account (each will have its own container in that storage account): 50 | 51 | ```PowerShell 52 | .\Deploy-LabEnvironment.ps1 -subscriptionId "40fc406c-c745-44f0-be2d-63b1c860cde0" -resourceGroupName "DatabricksLabs-01" -workspaceName "databrickslabs1149" -workspaceCount 2 53 | ``` 54 | 55 | **NOTE:** You may need to relax your PowerShell execution policy to execute this script. To do so, before running the above first run: 56 | Set-ExecutionPolicy -ExecutionPolicy Unrestricted 57 | 58 | **NOTE:** If you are skipping Lab 1, you will need to provide the `-skipLab1` switch to the PowerShell command. This will copy the lab files to the default Azure Storage account so you can successfully complete the other labs. This is because Azure Data Factory is used to copy the files as part of Lab 1. When you specify `-skipLab1`, Azure Data Factory is not provisioned. 59 | Example: 60 | 61 | ```PowerShell 62 | .\Deploy-LabEnvironment.ps1 -subscriptionId "[subscriptionID]" -resourceGroupName "[newResourceGroupName]" -workspaceName "[workspaceNamePrefix]" -workspaceCount 1 -resourceGroupLocation "[location]" -skipLab1 63 | ``` 64 | 65 | ```md 66 | NOTE: Environment Creation uses these as the defaults: 67 | 68 | * workspace name : [user supplied] 69 | * subscription: [user supplied] 70 | * resource group: [user supplied] 71 | * pricing tier (Azure Databricks): premium 72 | * location: [eastus] 73 | 74 | * primary storage type: Azure Storage 75 | * selection method: access key 76 | * storage account name: [same value as cluster name] 77 | * storage account key: [key] 78 | * default container: [workspaceName + ##] 79 | ``` 80 | 81 | ## 2. Create and Run a new Azure Databricks Cluster 82 | 83 | 1. Using the Azure Portal, navigate to your Azure Databricks workspace. 84 | 1. Within the Overview blade, select **Launch Workspace**. 85 | 1. Once logged in to the Azure Databricks workspace, select **Clusters** on the left-hand menu. 86 | 1. Select **+ Create Cluster**. 87 | 1. Set the following in the cluster creation form: 88 | 1. Cluster Type: Standard 89 | 1. Cluster Name: Provide a unique name, such as Lab 90 | 1. Databricks Runtime Version: Select 4.0 or greater (non-beta versions) 91 | 1. Python Version: 3 92 | 1. Driver Type: Default value 93 | 1. Worker Type: Default value (same for Min Workers and Max Workers) 94 | 1. Auto Termination: Check the box and set to 120 minutes of inactivity 95 | 1. **Important:** Edit the Spark Config by entering the connection information for your Azure Storage account. This will allow your cluster to access the lab files. You will find the init configuration value at the bottom of the PowerShell output after executing the `Deploy-LabEnvironment.ps1` script. The string should look similar to the following: `spark.hadoop.fs.azure.account.key.mydatabrickslab.blob.core.windows.net 8/jR7FXwkajLPObf8OOETzuiJxaIiI6B6z0m8euneUe0DgX/TnGHoywMw25kYdyTk/ap0eZ2PsQhXfE/E5d2Kg==`, where `mydatabrickslab` is your Azure Storage account name (matches the `workspaceName` value), and `8/jR7FXwkajLPObf8OOETzuiJxaIiI6B6z0m8euneUe0DgX/TnGHoywMw25kYdyTk/ap0eZ2PsQhXfE/E5d2Kg==` is your storage access key. 96 | 1. Select **Create Cluster** 97 | 98 | **Note:** Your lab files will be copied to a container created in the storage account with the following naming convention: {STORAGE-ACCOUNT-NAME}+{WORKSPACE-COUNT}. The default {WORKSPACE-COUNT} value is 0 when you specify 1 for the `workspaceCount` parameter. If creating multiple workspaces for a classroom environment, be sure to assign a number to each student and have them use that corresponding number that is appended to the end of their Azure Databricks workspace name, HDInsight Kafka cluster name, and the end of the container name within the lab's Azure Storage account. 99 | 100 | ## 3. Verify the Sample Data Copy 101 | 102 | **Note:** This section only applies if you are skipping Lab 1 (using the `-skipLab1` switch) 103 | 104 | For each Azure Databricks workspace, the script takes care of creating a new storage account, creating the default container in it and copying the sample data into that container. After the copy has completed, your workspaces will have access to a copy of the retaildata files underneath the path /retaildata in the storage container created for the workspace. 105 | 106 | The retaildata source files are currently available at the following location, accessed with a SAS token. 107 | 108 | **account name**: retaildatasamples 109 | 110 | **container**: data 111 | 112 | **path**: retaildata 113 | 114 | **SAS Key**: ?sv=2017-04-17&ss=b&srt=co&sp=rl&se=2019-12-31T18:29:33Z&st=2017-09-18T10:29:33Z&spr=https&sig=bw1EJflDFx9NuvLRdBGql8RU%2FC9oz92Dz8Xs76cftJM%3D 115 | 116 | Verify the copy has completed by navigating to the destination container using the Azure Portal. In the container list, select the ellipses and then Container properties. If your output matches the following, all of the files have been copied over: 117 | 118 | Container Size 119 | * Blob Count: 3202 120 | * Size: 12.02 GiB 121 | -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/HDInsightLabsEnvironment/Deploy-AzureResourceGroup.ps1: -------------------------------------------------------------------------------- 1 | #Requires -Version 3.0 2 | 3 | Param( 4 | [string] [Parameter(Mandatory=$true)] $ResourceGroupLocation, 5 | [string] $ResourceGroupName = 'HDInsightLabsEnvironment', 6 | [switch] $UploadArtifacts, 7 | [string] $StorageAccountName, 8 | [string] $StorageContainerName = $ResourceGroupName.ToLowerInvariant() + '-stageartifacts', 9 | [string] $TemplateFile = 'azuredeploy.json', 10 | [string] $TemplateParametersFile = 'azuredeploy.parameters.json', 11 | [string] $ArtifactStagingDirectory = '.', 12 | [string] $DSCSourceFolder = 'DSC', 13 | [switch] $ValidateOnly 14 | ) 15 | 16 | try { 17 | [Microsoft.Azure.Common.Authentication.AzureSession]::ClientFactory.AddUserAgent("VSAzureTools-$UI$($host.name)".replace(' ','_'), '3.0.0') 18 | } catch { } 19 | 20 | $ErrorActionPreference = 'Stop' 21 | Set-StrictMode -Version 3 22 | 23 | function Format-ValidationOutput { 24 | param ($ValidationOutput, [int] $Depth = 0) 25 | Set-StrictMode -Off 26 | return @($ValidationOutput | Where-Object { $_ -ne $null } | ForEach-Object { @(' ' * $Depth + ': ' + $_.Message) + @(Format-ValidationOutput @($_.Details) ($Depth + 1)) }) 27 | } 28 | 29 | $OptionalParameters = New-Object -TypeName Hashtable 30 | $TemplateFile = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $TemplateFile)) 31 | $TemplateParametersFile = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $TemplateParametersFile)) 32 | 33 | if ($UploadArtifacts) { 34 | # Convert relative paths to absolute paths if needed 35 | $ArtifactStagingDirectory = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $ArtifactStagingDirectory)) 36 | $DSCSourceFolder = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $DSCSourceFolder)) 37 | 38 | # Parse the parameter file and update the values of artifacts location and artifacts location SAS token if they are present 39 | $JsonParameters = Get-Content $TemplateParametersFile -Raw | ConvertFrom-Json 40 | if (($JsonParameters | Get-Member -Type NoteProperty 'parameters') -ne $null) { 41 | $JsonParameters = $JsonParameters.parameters 42 | } 43 | $ArtifactsLocationName = '_artifactsLocation' 44 | $ArtifactsLocationSasTokenName = '_artifactsLocationSasToken' 45 | $OptionalParameters[$ArtifactsLocationName] = $JsonParameters | Select -Expand $ArtifactsLocationName -ErrorAction Ignore | Select -Expand 'value' -ErrorAction Ignore 46 | $OptionalParameters[$ArtifactsLocationSasTokenName] = $JsonParameters | Select -Expand $ArtifactsLocationSasTokenName -ErrorAction Ignore | Select -Expand 'value' -ErrorAction Ignore 47 | 48 | # Create DSC configuration archive 49 | if (Test-Path $DSCSourceFolder) { 50 | $DSCSourceFilePaths = @(Get-ChildItem $DSCSourceFolder -File -Filter '*.ps1' | ForEach-Object -Process {$_.FullName}) 51 | foreach ($DSCSourceFilePath in $DSCSourceFilePaths) { 52 | $DSCArchiveFilePath = $DSCSourceFilePath.Substring(0, $DSCSourceFilePath.Length - 4) + '.zip' 53 | Publish-AzureRmVMDscConfiguration $DSCSourceFilePath -OutputArchivePath $DSCArchiveFilePath -Force -Verbose 54 | } 55 | } 56 | 57 | # Create a storage account name if none was provided 58 | if ($StorageAccountName -eq '') { 59 | $StorageAccountName = 'stage' + ((Get-AzureRmContext).Subscription.SubscriptionId).Replace('-', '').substring(0, 19) 60 | } 61 | 62 | $StorageAccount = (Get-AzureRmStorageAccount | Where-Object{$_.StorageAccountName -eq $StorageAccountName}) 63 | 64 | # Create the storage account if it doesn't already exist 65 | if ($StorageAccount -eq $null) { 66 | $StorageResourceGroupName = 'ARM_Deploy_Staging' 67 | New-AzureRmResourceGroup -Location "$ResourceGroupLocation" -Name $StorageResourceGroupName -Force 68 | $StorageAccount = New-AzureRmStorageAccount -StorageAccountName $StorageAccountName -Type 'Standard_LRS' -ResourceGroupName $StorageResourceGroupName -Location "$ResourceGroupLocation" 69 | } 70 | 71 | # Generate the value for artifacts location if it is not provided in the parameter file 72 | if ($OptionalParameters[$ArtifactsLocationName] -eq $null) { 73 | $OptionalParameters[$ArtifactsLocationName] = $StorageAccount.Context.BlobEndPoint + $StorageContainerName 74 | } 75 | 76 | # Copy files from the local storage staging location to the storage account container 77 | New-AzureStorageContainer -Name $StorageContainerName -Context $StorageAccount.Context -ErrorAction SilentlyContinue *>&1 78 | 79 | $ArtifactFilePaths = Get-ChildItem $ArtifactStagingDirectory -Recurse -File | ForEach-Object -Process {$_.FullName} 80 | foreach ($SourcePath in $ArtifactFilePaths) { 81 | Set-AzureStorageBlobContent -File $SourcePath -Blob $SourcePath.Substring($ArtifactStagingDirectory.length + 1) ` 82 | -Container $StorageContainerName -Context $StorageAccount.Context -Force 83 | } 84 | 85 | # Generate a 4 hour SAS token for the artifacts location if one was not provided in the parameters file 86 | if ($OptionalParameters[$ArtifactsLocationSasTokenName] -eq $null) { 87 | $OptionalParameters[$ArtifactsLocationSasTokenName] = ConvertTo-SecureString -AsPlainText -Force ` 88 | (New-AzureStorageContainerSASToken -Container $StorageContainerName -Context $StorageAccount.Context -Permission r -ExpiryTime (Get-Date).AddHours(4)) 89 | } 90 | } 91 | 92 | # Create or update the resource group using the specified template file and template parameters file 93 | New-AzureRmResourceGroup -Name $ResourceGroupName -Location $ResourceGroupLocation -Verbose -Force 94 | 95 | if ($ValidateOnly) { 96 | $ErrorMessages = Format-ValidationOutput (Test-AzureRmResourceGroupDeployment -ResourceGroupName $ResourceGroupName ` 97 | -TemplateFile $TemplateFile ` 98 | -TemplateParameterFile $TemplateParametersFile ` 99 | @OptionalParameters) 100 | if ($ErrorMessages) { 101 | Write-Output '', 'Validation returned the following errors:', @($ErrorMessages), '', 'Template is invalid.' 102 | } 103 | else { 104 | Write-Output '', 'Template is valid.' 105 | } 106 | } 107 | else { 108 | New-AzureRmResourceGroupDeployment -Name ((Get-ChildItem $TemplateFile).BaseName + '-' + ((Get-Date).ToUniversalTime()).ToString('MMdd-HHmm')) ` 109 | -ResourceGroupName $ResourceGroupName ` 110 | -TemplateFile $TemplateFile ` 111 | -TemplateParameterFile $TemplateParametersFile ` 112 | @OptionalParameters ` 113 | -Force -Verbose ` 114 | -ErrorVariable ErrorMessages 115 | if ($ErrorMessages) { 116 | Write-Output '', 'Template deployment returned the following errors:', @(@($ErrorMessages) | ForEach-Object { $_.Exception.Message.TrimEnd("`r`n") }) 117 | } 118 | } -------------------------------------------------------------------------------- /Setup/Deployment/HDInsightLabsEnvironment/HDInsightLabsEnvironment/bin/Debug/staging/HDInsightLabsEnvironment/Deploy-AzureResourceGroup.ps1: -------------------------------------------------------------------------------- 1 | #Requires -Version 3.0 2 | 3 | Param( 4 | [string] [Parameter(Mandatory=$true)] $ResourceGroupLocation, 5 | [string] $ResourceGroupName = 'HDInsightLabsEnvironment', 6 | [switch] $UploadArtifacts, 7 | [string] $StorageAccountName, 8 | [string] $StorageContainerName = $ResourceGroupName.ToLowerInvariant() + '-stageartifacts', 9 | [string] $TemplateFile = 'azuredeploy.json', 10 | [string] $TemplateParametersFile = 'azuredeploy.parameters.json', 11 | [string] $ArtifactStagingDirectory = '.', 12 | [string] $DSCSourceFolder = 'DSC', 13 | [switch] $ValidateOnly 14 | ) 15 | 16 | try { 17 | [Microsoft.Azure.Common.Authentication.AzureSession]::ClientFactory.AddUserAgent("VSAzureTools-$UI$($host.name)".replace(' ','_'), '3.0.0') 18 | } catch { } 19 | 20 | $ErrorActionPreference = 'Stop' 21 | Set-StrictMode -Version 3 22 | 23 | function Format-ValidationOutput { 24 | param ($ValidationOutput, [int] $Depth = 0) 25 | Set-StrictMode -Off 26 | return @($ValidationOutput | Where-Object { $_ -ne $null } | ForEach-Object { @(' ' * $Depth + ': ' + $_.Message) + @(Format-ValidationOutput @($_.Details) ($Depth + 1)) }) 27 | } 28 | 29 | $OptionalParameters = New-Object -TypeName Hashtable 30 | $TemplateFile = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $TemplateFile)) 31 | $TemplateParametersFile = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $TemplateParametersFile)) 32 | 33 | if ($UploadArtifacts) { 34 | # Convert relative paths to absolute paths if needed 35 | $ArtifactStagingDirectory = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $ArtifactStagingDirectory)) 36 | $DSCSourceFolder = [System.IO.Path]::GetFullPath([System.IO.Path]::Combine($PSScriptRoot, $DSCSourceFolder)) 37 | 38 | # Parse the parameter file and update the values of artifacts location and artifacts location SAS token if they are present 39 | $JsonParameters = Get-Content $TemplateParametersFile -Raw | ConvertFrom-Json 40 | if (($JsonParameters | Get-Member -Type NoteProperty 'parameters') -ne $null) { 41 | $JsonParameters = $JsonParameters.parameters 42 | } 43 | $ArtifactsLocationName = '_artifactsLocation' 44 | $ArtifactsLocationSasTokenName = '_artifactsLocationSasToken' 45 | $OptionalParameters[$ArtifactsLocationName] = $JsonParameters | Select -Expand $ArtifactsLocationName -ErrorAction Ignore | Select -Expand 'value' -ErrorAction Ignore 46 | $OptionalParameters[$ArtifactsLocationSasTokenName] = $JsonParameters | Select -Expand $ArtifactsLocationSasTokenName -ErrorAction Ignore | Select -Expand 'value' -ErrorAction Ignore 47 | 48 | # Create DSC configuration archive 49 | if (Test-Path $DSCSourceFolder) { 50 | $DSCSourceFilePaths = @(Get-ChildItem $DSCSourceFolder -File -Filter '*.ps1' | ForEach-Object -Process {$_.FullName}) 51 | foreach ($DSCSourceFilePath in $DSCSourceFilePaths) { 52 | $DSCArchiveFilePath = $DSCSourceFilePath.Substring(0, $DSCSourceFilePath.Length - 4) + '.zip' 53 | Publish-AzureRmVMDscConfiguration $DSCSourceFilePath -OutputArchivePath $DSCArchiveFilePath -Force -Verbose 54 | } 55 | } 56 | 57 | # Create a storage account name if none was provided 58 | if ($StorageAccountName -eq '') { 59 | $StorageAccountName = 'stage' + ((Get-AzureRmContext).Subscription.SubscriptionId).Replace('-', '').substring(0, 19) 60 | } 61 | 62 | $StorageAccount = (Get-AzureRmStorageAccount | Where-Object{$_.StorageAccountName -eq $StorageAccountName}) 63 | 64 | # Create the storage account if it doesn't already exist 65 | if ($StorageAccount -eq $null) { 66 | $StorageResourceGroupName = 'ARM_Deploy_Staging' 67 | New-AzureRmResourceGroup -Location "$ResourceGroupLocation" -Name $StorageResourceGroupName -Force 68 | $StorageAccount = New-AzureRmStorageAccount -StorageAccountName $StorageAccountName -Type 'Standard_LRS' -ResourceGroupName $StorageResourceGroupName -Location "$ResourceGroupLocation" 69 | } 70 | 71 | # Generate the value for artifacts location if it is not provided in the parameter file 72 | if ($OptionalParameters[$ArtifactsLocationName] -eq $null) { 73 | $OptionalParameters[$ArtifactsLocationName] = $StorageAccount.Context.BlobEndPoint + $StorageContainerName 74 | } 75 | 76 | # Copy files from the local storage staging location to the storage account container 77 | New-AzureStorageContainer -Name $StorageContainerName -Context $StorageAccount.Context -ErrorAction SilentlyContinue *>&1 78 | 79 | $ArtifactFilePaths = Get-ChildItem $ArtifactStagingDirectory -Recurse -File | ForEach-Object -Process {$_.FullName} 80 | foreach ($SourcePath in $ArtifactFilePaths) { 81 | Set-AzureStorageBlobContent -File $SourcePath -Blob $SourcePath.Substring($ArtifactStagingDirectory.length + 1) ` 82 | -Container $StorageContainerName -Context $StorageAccount.Context -Force 83 | } 84 | 85 | # Generate a 4 hour SAS token for the artifacts location if one was not provided in the parameters file 86 | if ($OptionalParameters[$ArtifactsLocationSasTokenName] -eq $null) { 87 | $OptionalParameters[$ArtifactsLocationSasTokenName] = ConvertTo-SecureString -AsPlainText -Force ` 88 | (New-AzureStorageContainerSASToken -Container $StorageContainerName -Context $StorageAccount.Context -Permission r -ExpiryTime (Get-Date).AddHours(4)) 89 | } 90 | } 91 | 92 | # Create or update the resource group using the specified template file and template parameters file 93 | New-AzureRmResourceGroup -Name $ResourceGroupName -Location $ResourceGroupLocation -Verbose -Force 94 | 95 | if ($ValidateOnly) { 96 | $ErrorMessages = Format-ValidationOutput (Test-AzureRmResourceGroupDeployment -ResourceGroupName $ResourceGroupName ` 97 | -TemplateFile $TemplateFile ` 98 | -TemplateParameterFile $TemplateParametersFile ` 99 | @OptionalParameters) 100 | if ($ErrorMessages) { 101 | Write-Output '', 'Validation returned the following errors:', @($ErrorMessages), '', 'Template is invalid.' 102 | } 103 | else { 104 | Write-Output '', 'Template is valid.' 105 | } 106 | } 107 | else { 108 | New-AzureRmResourceGroupDeployment -Name ((Get-ChildItem $TemplateFile).BaseName + '-' + ((Get-Date).ToUniversalTime()).ToString('MMdd-HHmm')) ` 109 | -ResourceGroupName $ResourceGroupName ` 110 | -TemplateFile $TemplateFile ` 111 | -TemplateParameterFile $TemplateParametersFile ` 112 | @OptionalParameters ` 113 | -Force -Verbose ` 114 | -ErrorVariable ErrorMessages 115 | if ($ErrorMessages) { 116 | Write-Output '', 'Template deployment returned the following errors:', @(@($ErrorMessages) | ForEach-Object { $_.Exception.Message.TrimEnd("`r`n") }) 117 | } 118 | } -------------------------------------------------------------------------------- /Setup/Scripts/azuredeploy.all.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": 3 | "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 4 | "contentVersion": "1.0.0.0", 5 | "parameters": { 6 | "clusterAttachedStorageType": { 7 | "type": "string", 8 | "defaultValue": "Standard_LRS", 9 | "allowedValues": [ 10 | "Standard_LRS", 11 | "Standard_ZRS", 12 | "Standard_GRS", 13 | "Standard_RAGRS", 14 | "Premium_LRS" 15 | ] 16 | }, 17 | "skipLab1": { 18 | "type": "string", 19 | "defaultValue": "Yes", 20 | "allowedValues": ["Yes", "No"], 21 | "metadata": { 22 | "description": 23 | "Specifies whether to skip the first lab. If so, lab files are copied automatically and ADF is not provisioned." 24 | } 25 | }, 26 | "workspaceName": { 27 | "type": "string", 28 | "metadata": { 29 | "description": "The name of the Azure Databricks workspace to create." 30 | } 31 | }, 32 | "pricingTier": { 33 | "type": "string", 34 | "defaultValue": "premium", 35 | "allowedValues": ["standard", "premium"], 36 | "metadata": { 37 | "description": "The pricing tier of Databricks workspace." 38 | } 39 | }, 40 | "location": { 41 | "type": "string", 42 | "defaultValue": "westus2", 43 | "metadata": { 44 | "description": 45 | "The location where all azure resources will be deployed." 46 | } 47 | }, 48 | "workspaceCount": { 49 | "type": "int", 50 | "defaultValue": 1, 51 | "metadata": { 52 | "description": "The number of Databricks workspaces to deploy." 53 | } 54 | } 55 | }, 56 | "variables": { 57 | "cleanResourcePrefix": 58 | "[replace(replace(replace(toLower(parameters('workspaceName')), '-', ''), '_', ''), '.', '')]", 59 | "clusterAttachedStorageName": "[parameters('workspaceName')]", 60 | "dataFactoryName": 61 | "[concat(variables('cleanResourcePrefix'), 'adf', uniqueString(resourceGroup().id))]", 62 | "factoryId": 63 | "[concat('Microsoft.DataFactory/factories/', variables('dataFactoryName'))]", 64 | "RetailData_BlobStorage_SasUri": 65 | "https://retaildatasamples.blob.core.windows.net/data?sv=2017-07-29&sr=c&sig=0%2FdYdx%2FW1X8EbO6GpH0R4ZEBrAUkYWqk2uz%2Fym5w3Gg%3D&st=2018-04-01T00%3A00%3A00Z&se=2099-12-31T12%3A59%3A59Z&sp=rl", 66 | "retailData_folderPath": "data/retaildata/rawdata/", 67 | "retailData_weblogs_folderPath": 68 | "[concat(variables('retailData_folderPath'), 'weblognew')]", 69 | "retailData_users_folderPath": 70 | "[concat(variables('retailData_folderPath'), 'UserFile')]", 71 | "retailData_products_folderPath": 72 | "[concat(variables('retailData_folderPath'), 'ProductFile')]", 73 | "retailDataDestination_folderPath": "retaildata/rawdata/", 74 | "weblogsDestinationPath": 75 | "[concat(variables('retailDataDestination_folderPath'), 'weblognew')]", 76 | "usersDestinationPath": 77 | "[concat(variables('retailDataDestination_folderPath'), 'UserFile')]", 78 | "productsDestinationPath": 79 | "[concat(variables('retailDataDestination_folderPath'), 'ProductFile')]", 80 | "fileName": "part*.csv" 81 | }, 82 | "resources": [ 83 | { 84 | "name": "[variables('clusterAttachedStorageName')]", 85 | "type": "Microsoft.Storage/storageAccounts", 86 | "location": "[parameters('location')]", 87 | "apiVersion": "2016-01-01", 88 | "sku": { 89 | "name": "[parameters('clusterAttachedStorageType')]" 90 | }, 91 | "dependsOn": [], 92 | "tags": { 93 | "displayName": "clusterattachedstorage" 94 | }, 95 | "kind": "Storage" 96 | }, 97 | // Azure Data Factory 98 | { 99 | "name": "[concat(variables('dataFactoryName'), copyIndex())]", 100 | "condition": "[equals(parameters('skipLab1'),'No')]", 101 | "type": "Microsoft.DataFactory/factories", 102 | "location": "[resourceGroup().location]", 103 | "apiVersion": "2017-09-01-preview", 104 | "copy": { 105 | "name": "adfcopy", 106 | "count": "[parameters('workspaceCount')]" 107 | }, 108 | "identity": { 109 | "type": "SystemAssigned" 110 | }, 111 | "properties": {} 112 | }, 113 | { 114 | "name": 115 | "[concat(variables('dataFactoryName'), copyIndex(), '/Pipeline_CopyRetailDataToStorageAccount')]", 116 | "condition": "[equals(parameters('skipLab1'),'No')]", 117 | "type": "Microsoft.DataFactory/factories/pipelines", 118 | "apiVersion": "2017-09-01-preview", 119 | "copy": { 120 | "name": "adfpipelinecopyretaildatacopy", 121 | "count": "[parameters('workspaceCount')]" 122 | }, 123 | "properties": { 124 | "description": 125 | "Copies files from the retaildata public storage account to your storage account", 126 | "activities": [ 127 | { 128 | "name": "Copy_weblogs", 129 | "type": "Copy", 130 | "dependsOn": [], 131 | "policy": { 132 | "timeout": "7.00:00:00", 133 | "retry": 0, 134 | "retryIntervalInSeconds": 30 135 | }, 136 | "typeProperties": { 137 | "source": { 138 | "type": "BlobSource", 139 | "recursive": true 140 | }, 141 | "sink": { 142 | "type": "BlobSink", 143 | "copyBehavior": "PreserveHierarchy" 144 | }, 145 | "enableStaging": false, 146 | "cloudDataMovementUnits": 0, 147 | "enableSkipIncompatibleRow": true 148 | }, 149 | "inputs": [ 150 | { 151 | "referenceName": "retaildata_weblogs", 152 | "type": "DatasetReference", 153 | "parameters": {} 154 | } 155 | ], 156 | "outputs": [ 157 | { 158 | "referenceName": "weblogs_output", 159 | "type": "DatasetReference", 160 | "parameters": {} 161 | } 162 | ] 163 | }, 164 | { 165 | "name": "Copy_users", 166 | "type": "Copy", 167 | "dependsOn": [], 168 | "policy": { 169 | "timeout": "7.00:00:00", 170 | "retry": 0, 171 | "retryIntervalInSeconds": 30, 172 | "secureOutput": false 173 | }, 174 | "typeProperties": { 175 | "source": { 176 | "type": "BlobSource", 177 | "recursive": true 178 | }, 179 | "sink": { 180 | "type": "BlobSink", 181 | "copyBehavior": "PreserveHierarchy" 182 | }, 183 | "enableStaging": false, 184 | "cloudDataMovementUnits": 0, 185 | "enableSkipIncompatibleRow": true 186 | }, 187 | "inputs": [ 188 | { 189 | "referenceName": "retaildata_users", 190 | "type": "DatasetReference", 191 | "parameters": {} 192 | } 193 | ], 194 | "outputs": [ 195 | { 196 | "referenceName": "users_output", 197 | "type": "DatasetReference", 198 | "parameters": {} 199 | } 200 | ] 201 | }, 202 | { 203 | "name": "Copy_products", 204 | "type": "Copy", 205 | "dependsOn": [], 206 | "policy": { 207 | "timeout": "7.00:00:00", 208 | "retry": 0, 209 | "retryIntervalInSeconds": 30, 210 | "secureOutput": false 211 | }, 212 | "typeProperties": { 213 | "source": { 214 | "type": "BlobSource", 215 | "recursive": true 216 | }, 217 | "sink": { 218 | "type": "BlobSink" 219 | }, 220 | "enableStaging": false, 221 | "cloudDataMovementUnits": 0, 222 | "enableSkipIncompatibleRow": true 223 | }, 224 | "inputs": [ 225 | { 226 | "referenceName": "retaildata_products", 227 | "type": "DatasetReference", 228 | "parameters": {} 229 | } 230 | ], 231 | "outputs": [ 232 | { 233 | "referenceName": "products_output", 234 | "type": "DatasetReference", 235 | "parameters": {} 236 | } 237 | ] 238 | } 239 | ] 240 | }, 241 | "dependsOn": [ 242 | "[concat(variables('factoryId'), copyIndex(), '/datasets/retaildata_weblogs')]", 243 | "[concat(variables('factoryId'), copyIndex(), '/datasets/weblogs_output')]", 244 | "[concat(variables('factoryId'), copyIndex(), '/datasets/retaildata_users')]", 245 | "[concat(variables('factoryId'), copyIndex(), '/datasets/users_output')]", 246 | "[concat(variables('factoryId'), copyIndex(), '/datasets/retaildata_products')]", 247 | "[concat(variables('factoryId'), copyIndex(), '/datasets/products_output')]" 248 | ] 249 | }, 250 | { 251 | "name": 252 | "[concat(variables('dataFactoryName'), copyIndex(), '/retaildata_weblogs')]", 253 | "condition": "[equals(parameters('skipLab1'),'No')]", 254 | "type": "Microsoft.DataFactory/factories/datasets", 255 | "apiVersion": "2017-09-01-preview", 256 | "copy": { 257 | "name": "adfretaildataweblogscopy", 258 | "count": "[parameters('workspaceCount')]" 259 | }, 260 | "properties": { 261 | "linkedServiceName": { 262 | "referenceName": "RetailData_BlobStorage", 263 | "type": "LinkedServiceReference" 264 | }, 265 | "type": "AzureBlob", 266 | "structure": [ 267 | { 268 | "name": "UserId", 269 | "type": "Int64" 270 | }, 271 | { 272 | "name": "SessionId", 273 | "type": "String" 274 | }, 275 | { 276 | "name": "ProductId", 277 | "type": "Int64" 278 | }, 279 | { 280 | "name": "Quantity", 281 | "type": "Int64" 282 | }, 283 | { 284 | "name": "Price", 285 | "type": "Double" 286 | }, 287 | { 288 | "name": "TotalPrice", 289 | "type": "Double" 290 | }, 291 | { 292 | "name": "ReferralURL", 293 | "type": "String" 294 | }, 295 | { 296 | "name": "PageStopDuration", 297 | "type": "Int64" 298 | }, 299 | { 300 | "name": "Action", 301 | "type": "String" 302 | }, 303 | { 304 | "name": "TransactionDate", 305 | "type": "DateTime" 306 | } 307 | ], 308 | "typeProperties": { 309 | "format": { 310 | "type": "TextFormat", 311 | "columnDelimiter": "|", 312 | "treatEmptyAsNull": true, 313 | "firstRowAsHeader": true 314 | }, 315 | "folderPath": "[variables('retaildata_weblogs_folderPath')]" 316 | } 317 | }, 318 | "dependsOn": [ 319 | "[concat(variables('factoryId'), copyIndex(), '/linkedServices/RetailData_BlobStorage')]" 320 | ] 321 | }, 322 | { 323 | "name": 324 | "[concat(variables('dataFactoryName'), copyIndex(), '/weblogs_output')]", 325 | "condition": "[equals(parameters('skipLab1'),'No')]", 326 | "type": "Microsoft.DataFactory/factories/datasets", 327 | "apiVersion": "2017-09-01-preview", 328 | "copy": { 329 | "name": "adfweblogsoutputcopy", 330 | "count": "[parameters('workspaceCount')]" 331 | }, 332 | "properties": { 333 | "linkedServiceName": { 334 | "referenceName": "Destination_BlobStorage", 335 | "type": "LinkedServiceReference" 336 | }, 337 | "type": "AzureBlob", 338 | "structure": [ 339 | { 340 | "name": "UserId", 341 | "type": "Int64" 342 | }, 343 | { 344 | "name": "SessionId", 345 | "type": "String" 346 | }, 347 | { 348 | "name": "ProductId", 349 | "type": "Int64" 350 | }, 351 | { 352 | "name": "Quantity", 353 | "type": "Int64" 354 | }, 355 | { 356 | "name": "Price", 357 | "type": "Double" 358 | }, 359 | { 360 | "name": "TotalPrice", 361 | "type": "Double" 362 | }, 363 | { 364 | "name": "ReferralURL", 365 | "type": "String" 366 | }, 367 | { 368 | "name": "PageStopDuration", 369 | "type": "Int64" 370 | }, 371 | { 372 | "name": "Action", 373 | "type": "String" 374 | }, 375 | { 376 | "name": "TransactionDate", 377 | "type": "DateTime" 378 | } 379 | ], 380 | "typeProperties": { 381 | "format": { 382 | "type": "TextFormat", 383 | "columnDelimiter": "|", 384 | "treatEmptyAsNull": true, 385 | "firstRowAsHeader": true 386 | }, 387 | "folderPath": 388 | "[concat(variables('clusterAttachedStorageName') ,copyIndex(), '/', variables('weblogsDestinationPath'))]" 389 | } 390 | }, 391 | "dependsOn": [ 392 | "[concat(variables('factoryId'), copyIndex(), '/linkedServices/Destination_BlobStorage')]" 393 | ] 394 | }, 395 | { 396 | "name": 397 | "[concat(variables('dataFactoryName'), copyIndex(), '/retaildata_users')]", 398 | "condition": "[equals(parameters('skipLab1'),'No')]", 399 | "type": "Microsoft.DataFactory/factories/datasets", 400 | "apiVersion": "2017-09-01-preview", 401 | "copy": { 402 | "name": "adfretaildatauserscopy", 403 | "count": "[parameters('workspaceCount')]" 404 | }, 405 | "properties": { 406 | "linkedServiceName": { 407 | "referenceName": "RetailData_BlobStorage", 408 | "type": "LinkedServiceReference" 409 | }, 410 | "type": "AzureBlob", 411 | "typeProperties": { 412 | "format": { 413 | "type": "TextFormat", 414 | "columnDelimiter": ",", 415 | "rowDelimiter": "", 416 | "nullValue": "\\N", 417 | "treatEmptyAsNull": true, 418 | "firstRowAsHeader": false 419 | }, 420 | "fileName": "[variables('fileName')]", 421 | "folderPath": "[variables('retaildata_users_folderPath')]" 422 | } 423 | }, 424 | "dependsOn": [ 425 | "[concat(variables('factoryId'), copyIndex(), '/linkedServices/RetailData_BlobStorage')]" 426 | ] 427 | }, 428 | { 429 | "name": 430 | "[concat(variables('dataFactoryName'), copyIndex(), '/users_output')]", 431 | "condition": "[equals(parameters('skipLab1'),'No')]", 432 | "type": "Microsoft.DataFactory/factories/datasets", 433 | "apiVersion": "2017-09-01-preview", 434 | "copy": { 435 | "name": "adfusersoutputcopy", 436 | "count": "[parameters('workspaceCount')]" 437 | }, 438 | "properties": { 439 | "linkedServiceName": { 440 | "referenceName": "Destination_BlobStorage", 441 | "type": "LinkedServiceReference" 442 | }, 443 | "type": "AzureBlob", 444 | "typeProperties": { 445 | "format": { 446 | "type": "TextFormat", 447 | "columnDelimiter": ",", 448 | "rowDelimiter": "", 449 | "nullValue": "\\N", 450 | "treatEmptyAsNull": true, 451 | "firstRowAsHeader": false 452 | }, 453 | "folderPath": 454 | "[concat(variables('clusterAttachedStorageName') ,copyIndex(), '/', variables('usersDestinationPath'))]" 455 | } 456 | }, 457 | "dependsOn": [ 458 | "[concat(variables('factoryId'), copyIndex(), '/linkedServices/Destination_BlobStorage')]" 459 | ] 460 | }, 461 | { 462 | "name": 463 | "[concat(variables('dataFactoryName'), copyIndex(), '/retaildata_products')]", 464 | "condition": "[equals(parameters('skipLab1'),'No')]", 465 | "type": "Microsoft.DataFactory/factories/datasets", 466 | "apiVersion": "2017-09-01-preview", 467 | "copy": { 468 | "name": "adfretaildataproductscopy", 469 | "count": "[parameters('workspaceCount')]" 470 | }, 471 | "properties": { 472 | "linkedServiceName": { 473 | "referenceName": "RetailData_BlobStorage", 474 | "type": "LinkedServiceReference" 475 | }, 476 | "type": "AzureBlob", 477 | "typeProperties": { 478 | "format": { 479 | "type": "TextFormat", 480 | "columnDelimiter": ",", 481 | "rowDelimiter": "", 482 | "nullValue": "\\N", 483 | "treatEmptyAsNull": true, 484 | "firstRowAsHeader": false 485 | }, 486 | "fileName": "[variables('fileName')]", 487 | "folderPath": "[variables('retaildata_products_folderPath')]" 488 | } 489 | }, 490 | "dependsOn": [ 491 | "[concat(variables('factoryId'), copyIndex(), '/linkedServices/RetailData_BlobStorage')]" 492 | ] 493 | }, 494 | { 495 | "name": 496 | "[concat(variables('dataFactoryName'), copyIndex(), '/products_output')]", 497 | "condition": "[equals(parameters('skipLab1'),'No')]", 498 | "type": "Microsoft.DataFactory/factories/datasets", 499 | "apiVersion": "2017-09-01-preview", 500 | "copy": { 501 | "name": "productsoutputcopy", 502 | "count": "[parameters('workspaceCount')]" 503 | }, 504 | "properties": { 505 | "linkedServiceName": { 506 | "referenceName": "Destination_BlobStorage", 507 | "type": "LinkedServiceReference" 508 | }, 509 | "type": "AzureBlob", 510 | "typeProperties": { 511 | "format": { 512 | "type": "TextFormat", 513 | "columnDelimiter": ",", 514 | "rowDelimiter": "", 515 | "nullValue": "\\N", 516 | "treatEmptyAsNull": true, 517 | "firstRowAsHeader": false 518 | }, 519 | "folderPath": 520 | "[concat(variables('clusterAttachedStorageName') ,copyIndex(), '/', variables('productsDestinationPath'))]" 521 | } 522 | }, 523 | "dependsOn": [ 524 | "[concat(variables('factoryId'), copyIndex(), '/linkedServices/Destination_BlobStorage')]" 525 | ] 526 | }, 527 | { 528 | "name": 529 | "[concat(variables('dataFactoryName'), copyIndex(), '/RetailData_BlobStorage')]", 530 | "condition": "[equals(parameters('skipLab1'),'No')]", 531 | "type": "Microsoft.DataFactory/factories/linkedServices", 532 | "apiVersion": "2017-09-01-preview", 533 | "copy": { 534 | "name": "adflinkedsourceblobcopy", 535 | "count": "[parameters('workspaceCount')]" 536 | }, 537 | "properties": { 538 | "type": "AzureStorage", 539 | "typeProperties": { 540 | "sasUri": { 541 | "type": "SecureString", 542 | "value": "[variables('RetailData_BlobStorage_SasUri')]" 543 | } 544 | } 545 | }, 546 | "dependsOn": ["[concat(variables('factoryId'), copyIndex())]"] 547 | }, 548 | { 549 | "name": 550 | "[concat(variables('dataFactoryName'), copyIndex(), '/Destination_BlobStorage')]", 551 | "condition": "[equals(parameters('skipLab1'),'No')]", 552 | "type": "Microsoft.DataFactory/factories/linkedServices", 553 | "apiVersion": "2017-09-01-preview", 554 | "copy": { 555 | "name": "adflinkeddestinationblobcopy", 556 | "count": "[parameters('workspaceCount')]" 557 | }, 558 | "properties": { 559 | "type": "AzureStorage", 560 | "typeProperties": { 561 | "connectionString": { 562 | "type": "SecureString", 563 | "value": 564 | "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('clusterAttachedStorageName'),';AccountKey=',listKeys(resourceId('Microsoft.Storage/storageAccounts', variables('clusterAttachedStorageName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value)]" 565 | } 566 | } 567 | }, 568 | "dependsOn": [ 569 | "[concat('Microsoft.Storage/storageAccounts/',variables('clusterAttachedStorageName'))]", 570 | "[concat(variables('factoryId'), copyIndex())]" 571 | ] 572 | }, 573 | // Azure Databricks workspace 574 | { 575 | "apiVersion": "2018-04-01", 576 | "name": "[concat(parameters('workspaceName'), copyIndex())]", 577 | "copy": { 578 | "name": "databrickscopy", 579 | "count": "[parameters('workspaceCount')]" 580 | }, 581 | "type": "Microsoft.Databricks/workspaces", 582 | "location": "[parameters('location')]", 583 | "dependsOn": [ 584 | "[resourceId('Microsoft.Storage/storageAccounts', variables('clusterAttachedStorageName'))]" 585 | ], 586 | "sku": { 587 | "name": "[parameters('pricingTier')]" 588 | }, 589 | "properties": { 590 | "ManagedResourceGroupId": 591 | "[concat(subscription().id, '/resourceGroups/', 'databricks-rg-', parameters('workspaceName'), copyIndex(), '-', uniqueString(parameters('workspaceName'), resourceGroup().id))]" 592 | } 593 | } 594 | ], 595 | "outputs": {} 596 | } 597 | -------------------------------------------------------------------------------- /Setup/Template/.vs/config/applicationhost.config: -------------------------------------------------------------------------------- 1 | 2 | 20 | 21 | 22 | 49 | 50 | 51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 | 60 | 61 | 62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 | 81 |
82 |
83 | 84 |
85 |
86 |
87 |
88 |
89 |
90 | 91 |
92 |
93 |
94 |
95 |
96 | 97 |
98 |
99 |
100 | 101 |
102 |
103 | 104 |
105 |
106 | 107 |
108 |
109 |
110 | 111 | 112 |
113 |
114 |
115 |
116 |
117 |
118 | 119 |
120 |
121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 732 | 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 754 | 755 | 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | 788 | 789 | 790 | 791 | 792 | 793 | 794 | 795 | 796 | 797 | 798 | 799 | 800 | 801 | 802 | 803 | 804 | 805 | 806 | 807 | 808 | 809 | 810 | 811 | 812 | 813 | 814 | 815 | 816 | 817 | 818 | 819 | 820 | 821 | 822 | 823 | 824 | 825 | 826 | 827 | 828 | 829 | 830 | 831 | 832 | 833 | 834 | 835 | 836 | 837 | 838 | 839 | 840 | 841 | 842 | 843 | 844 | 845 | 846 | 847 | 848 | 849 | 850 | 851 | 852 | 853 | 854 | 855 | 856 | 857 | 858 | 859 | 860 | 861 | 862 | 863 | 864 | 865 | 866 | 867 | 868 | 869 | 870 | 871 | 872 | 873 | 874 | 875 | 876 | 877 | 878 | 879 | 880 | 881 | 882 | 883 | 884 | 887 | 888 | 889 | 890 | 891 | 892 | 893 | 894 | 895 | 896 | 897 | 898 | 899 | 902 | 903 | 904 | 905 | 906 | 907 | 908 | 909 | 910 | 911 | 912 | 913 | 914 | 915 | 916 | 917 | 918 | 919 | 920 | 921 | 922 | 923 | 924 | 925 | 926 | 927 | 928 | 929 | 930 | 931 | 932 | 933 | 934 | 935 | 936 | 937 | 938 | 939 | 940 | 941 | 942 | 943 | 944 | 945 | 946 | 947 | 948 | 949 | 950 | 951 | 952 | 953 | 954 | 955 | 956 | 957 | 958 | 959 | 960 | 961 | 962 | 963 | 964 | 965 | 966 | 967 | 968 | 969 | 970 | 971 | 972 | 973 | 974 | 975 | 976 | 977 | 978 | 979 | 980 | 981 | 982 | 983 | 984 | 985 | 986 | 987 | 988 | 989 | 990 | 991 | 992 | 993 | 994 | 995 | 996 | 997 | 998 | 999 | 1000 | 1001 | 1002 | 1003 | 1004 | 1005 | 1006 | 1007 | 1008 | 1009 | 1010 | 1011 | 1012 | 1013 | 1014 | 1015 | 1016 | 1017 | 1018 | 1019 | 1020 | 1021 | --------------------------------------------------------------------------------