├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── Synapse ├── lab1 │ ├── lab1-sqlpool-perf-optimization-part-1.md │ ├── lab1-sqlpool-perf-optimization-part-2.md │ ├── media │ │ ├── lab3_materialized_view_rebuilt.png │ │ ├── lab3_materialized_view_updated.png │ │ ├── lab3_ordered_cci.png │ │ ├── lab3_ordered_cci_2.png │ │ ├── lab3_query_id.png │ │ ├── lab3_result_set_cache_build.png │ │ ├── lab3_result_set_cache_off.png │ │ ├── lab3_result_set_cache_size.png │ │ ├── lab3_result_set_caching_db.png │ │ ├── lab3_shuffle_move.png │ │ ├── lab3_shuffle_move_2.png │ │ ├── lab3_shuffle_move_3.png │ │ ├── lab3_shuffle_move_4.png │ │ ├── lab3_statistics_automated.png │ │ ├── lab3_statistics_customerid.png │ │ ├── lab3_table_space_usage.png │ │ ├── lab4_column_store_row_groups.png │ │ ├── lab4_column_store_row_groups_2.png │ │ ├── lab4_data_skew_1.png │ │ ├── lab4_data_skew_2.png │ │ ├── lab4_data_type_selection.png │ │ ├── lab4_table_space.png │ │ ├── lab4_transaction_items_count_distribution.png │ │ ├── manage-hub.png │ │ └── resume-sql-pool.png │ └── readme.md ├── lab2 │ ├── lab2-data-integration.md │ ├── media │ │ ├── attach-spark-pool.png │ │ ├── copy-pipeline-trigger-now.png │ │ ├── cosmos-dataset-preview-data-link.png │ │ ├── cosmos-db-dataset-preview-data.png │ │ ├── cosmos-db-dataset-schema.png │ │ ├── create-adls-dataset.png │ │ ├── create-cosmos-db-dataset.png │ │ ├── create-cosmos-db-linked-service-step1.png │ │ ├── create-cosmos-db-linked-service.png │ │ ├── daily-sales-counts-chart.png │ │ ├── data-hub-parquet-select-rows.png │ │ ├── data-hub.png │ │ ├── develop-hub.png │ │ ├── getting-started.png │ │ ├── manage-hub.png │ │ ├── monitor-hub.png │ │ ├── new-cell.png │ │ ├── new-dataset.png │ │ ├── new-pipeline.png │ │ ├── new-spark-notebook-sales.png │ │ ├── orchestrate-hub.png │ │ ├── pipeline-copy-sales-drag-copy-data.png │ │ ├── pipeline-copy-sales-name.png │ │ ├── pipeline-copy-sales-pipeline-run.png │ │ ├── pipeline-copy-sales-settings.png │ │ ├── pipeline-copy-sales-sink-dataset.png │ │ ├── pipeline-copy-sales-sink-mapping.png │ │ ├── pipeline-copy-sales-sink-settings.png │ │ ├── pipeline-copy-sales-source-dataset.png │ │ ├── publish-all-1.png │ │ ├── rename-publish-sql-script.png │ │ ├── resume-sql-pool.png │ │ ├── select-workspace.png │ │ ├── sort-desc-error.png │ │ ├── spark-explode-output.png │ │ ├── spark-grouped-top-purchases-total-items.png │ │ ├── spark-grouped-top-purchases.png │ │ ├── spark-json-output-nested.png │ │ ├── spark-toppurchases-output.png │ │ ├── sql-on-demand-selected.png │ │ ├── sql-serverless-aggregates.png │ │ ├── sqlpool01-import01-linked-service.png │ │ ├── synapse-studio-new-sql-script.png │ │ ├── synapse-studio-query-toolbar-connect.png │ │ ├── synapse-studio-query-toolbar-run.png │ │ └── workload-classifiers-query-results.png │ └── readme.md ├── readme.md └── setup │ ├── automation │ ├── 00-asa-workspace-core.json │ └── 01-environment-setup.ps1 │ ├── datasets │ ├── customer_profile_cosmosdb.json │ ├── wwi02_date_adls.json │ ├── wwi02_date_asa.json │ ├── wwi02_online_user_profiles_01_adal.json │ ├── wwi02_poc_customer_adls.json │ ├── wwi02_poc_customer_asa.json │ ├── wwi02_product_adls.json │ ├── wwi02_product_asa.json │ ├── wwi02_sale_small_adls.json │ ├── wwi02_sale_small_asa.json │ ├── wwi02_sale_small_workload_01_asa.json │ └── wwi02_sale_small_workload_02_asa.json │ ├── media │ ├── asaworkspace-deploy-configure.png │ ├── asaworkspace-deploy-progress.png │ ├── asaworkspace-deploy-tag.png │ ├── cloudshell-configure-01.png │ ├── cloudshell-configure-02.png │ ├── cloudshell-configure-03.png │ ├── cloudshell-configure-04.png │ ├── cloudshell-setup-01.png │ ├── cloudshell-setup-02.png │ ├── cloudshell-setup-03.png │ └── cloudshell-setup-04.png │ ├── pipelines │ ├── execute_business_analyst_queries.json │ ├── execute_data_analyst_and_ceo_queries.json │ ├── import_customer_profiles_into_cosmosdb.json │ ├── import_poc_customer_data.json │ └── load_sql_pool_from_data_lake.json │ ├── readme.md │ ├── solliance-synapse-automation │ └── solliance-synapse-automation.psm1 │ ├── sql │ ├── 01-create-logins.sql │ ├── 02-create-users.sql │ ├── 03-create-schemas.sql │ ├── 04-create-tables-in-wwi-schema.sql │ ├── 05-create-tables-in-wwi-ml-schema-delta.sql │ ├── 05-create-tables-in-wwi-ml-schema.sql │ ├── 06-create-tables-in-wwi-security-schema.sql │ ├── 07-create-wwi-perf-sale-heap.sql │ ├── 08-create-wwi-perf-sale-partition01.sql │ ├── 09-create-wwi-perf-sale-partition02.sql │ ├── 10-create-wwi-perf-sale-index.sql │ ├── 11-create-wwi-perf-sale-hash-ordered.sql │ ├── 12-create-wwi-perf-sale-hash-projection.sql │ ├── 13-create-wwi-perf-sale-hash-projection2.sql │ ├── 14-create-wwi-perf-sale-hash-projection-big.sql │ ├── 15-create-wwi-perf-sale-hash-projection-big2.sql │ ├── 16-create-poc-schema.sql │ ├── 17-create-wwi-poc-sale-heap.sql │ ├── 18-get-sql-pool-artifacts.sql │ ├── 19-cleanup-sql-pool.sql │ └── _test.json │ └── templates │ ├── blob_storage_linked_service.json │ ├── cosmos_db_linked_service.json │ ├── data_lake_linked_service.json │ ├── integration_runtime.json │ ├── key_vault_linked_service.json │ ├── spark_notebook.json │ ├── spark_notebook_session.json │ ├── sql_pool_key_vault_linked_service.json │ ├── sql_pool_script.json │ └── sql_script.json └── readme /.gitignore: -------------------------------------------------------------------------------- 1 | Synapse/setup/automation/azcopy_windows_amd64_10.11.0/NOTICE.txt 2 | Synapse/setup/automation/azCopy.zip 3 | Synapse/setup/automation/azcopy_windows_amd64_10.11.0/azcopy.exe 4 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project 2 | 3 | > This repo has been populated by an initial template to help get you started. Please 4 | > make sure to update the content to build a great experience for community-building. 5 | 6 | As the maintainer of this project, please make a few updates: 7 | 8 | - Improving this README.MD file to provide a great experience 9 | - Updating SUPPORT.MD with content about this project's support experience 10 | - Understanding the security reporting process in SECURITY.MD 11 | - Remove this section from the README 12 | 13 | ## Contributing 14 | 15 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 16 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 17 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 18 | 19 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 20 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 21 | provided by the bot. You will only need to do this once across all repos using our CLA. 22 | 23 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 24 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 25 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 26 | 27 | ## Trademarks 28 | 29 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 30 | trademarks or logos is subject to and must follow 31 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 32 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 33 | Any use of third-party trademarks or logos are subject to those third-party's policies. 34 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). 7 | - **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /Synapse/lab1/lab1-sqlpool-perf-optimization-part-2.md: -------------------------------------------------------------------------------- 1 | # SQL Pool Performance Optimization Part 2 2 | 3 | ## Lab prerequisite 4 | 5 | Start the SQL Pool in your lab environment. 6 | 7 | 1. Open the Synapse Studio workspace and navigate to the **Manage** hub. 8 | 9 | ![The Manage menu item is highlighted.](media/manage-hub.png "Manage hub") 10 | 11 | 2. From the center menu, select **SQL pools** from beneath the **Analytics pools** heading. Locate `SQLPool01`, and select the **Resume** button. 12 | 13 | ![The Manage menu item is selected, with SQL pools selected from the center menu. The resume button is selected next to the SQLPool01 item.](media/resume-sql-pool.png "SQL pools listing") 14 | 15 | ## Exercise 1 - Check for skewed data and space usage 16 | 17 | ### Task 1 - Analyze the space used by tables 18 | 19 | 1. Run the following DBCC command: 20 | 21 | ```sql 22 | DBCC PDW_SHOWSPACEUSED('wwi_perf.Sale_Hash'); 23 | ``` 24 | 25 | ![Show table space usage](./media/lab3_table_space_usage.png) 26 | 27 | 2. Analyze the number of rows in each distribution. Those numbers should be as even as possible. You can see from the results that rows are equally distributed across distributions. Let's dive a bit more into this analysis. Use the following query to get customers with the most sale transaction items: 28 | 29 | ```sql 30 | SELECT TOP 1000 31 | CustomerId, 32 | count(*) as TransactionItemsCount 33 | FROM 34 | [wwi_perf].[Sale_Hash] 35 | GROUP BY 36 | CustomerId 37 | ORDER BY 38 | count(*) DESC 39 | ``` 40 | 41 | ![Customers with most sale transaction items](./media/lab4_data_skew_1.png) 42 | 43 | Now find the customers with the least sale transaction items: 44 | 45 | ```sql 46 | SELECT TOP 1000 47 | CustomerId, 48 | count(*) as TransactionItemsCount 49 | FROM 50 | [wwi_perf].[Sale_Hash] 51 | GROUP BY 52 | CustomerId 53 | ORDER BY 54 | count(*) ASC 55 | ``` 56 | 57 | ![Customers with most sale transaction items](./media/lab4_data_skew_2.png) 58 | 59 | Notice the largest number of transaction items is 69 and the smallest is 16. 60 | 61 | Let's find now the distribution of per-customer transaction item counts. Run the following query: 62 | 63 | ```sql 64 | SELECT 65 | T.TransactionItemsCountBucket 66 | ,count(*) as CustomersCount 67 | FROM 68 | ( 69 | SELECT 70 | CustomerId, 71 | (count(*) - 16) / 100 as TransactionItemsCountBucket 72 | FROM 73 | [wwi_perf].[Sale_Hash] 74 | GROUP BY 75 | CustomerId 76 | ) T 77 | GROUP BY 78 | T.TransactionItemsCountBucket 79 | ORDER BY 80 | T.TransactionItemsCountBucket 81 | ``` 82 | 83 | In the `Results` pane, switch to the `Chart` view and configure it as follows (see the options set on the right side): 84 | 85 | ![Distribution of per-customer transaction item counts](./media/lab4_transaction_items_count_distribution.png) 86 | 87 | Without diving too much into the mathematical and statistical aspects of it, this histogram displays the reason why there is virtually no skew in the data distribution of the `Sale_Hash` table. If you haven't figured it out yet, the reason we are talking about is the quasi-normal distribution of the per-customer transaction items counts. 88 | 89 | ### Task 2 - Use a more advanced approach to understand table space usage 90 | 91 | 1. Run the following script to create the `vTableSizes` view: 92 | 93 | ```sql 94 | CREATE VIEW [wwi_perf].[vTableSizes] 95 | AS 96 | WITH base 97 | AS 98 | ( 99 | SELECT 100 | GETDATE() AS [execution_time] 101 | , DB_NAME() AS [database_name] 102 | , s.name AS [schema_name] 103 | , t.name AS [table_name] 104 | , QUOTENAME(s.name)+'.'+QUOTENAME(t.name) AS [two_part_name] 105 | , nt.[name] AS [node_table_name] 106 | , ROW_NUMBER() OVER(PARTITION BY nt.[name] ORDER BY (SELECT NULL)) AS [node_table_name_seq] 107 | , tp.[distribution_policy_desc] AS [distribution_policy_name] 108 | , c.[name] AS [distribution_column] 109 | , nt.[distribution_id] AS [distribution_id] 110 | , i.[type] AS [index_type] 111 | , i.[type_desc] AS [index_type_desc] 112 | , nt.[pdw_node_id] AS [pdw_node_id] 113 | , pn.[type] AS [pdw_node_type] 114 | , pn.[name] AS [pdw_node_name] 115 | , di.name AS [dist_name] 116 | , di.position AS [dist_position] 117 | , nps.[partition_number] AS [partition_nmbr] 118 | , nps.[reserved_page_count] AS [reserved_space_page_count] 119 | , nps.[reserved_page_count] - nps.[used_page_count] AS [unused_space_page_count] 120 | , nps.[in_row_data_page_count] 121 | + nps.[row_overflow_used_page_count] 122 | + nps.[lob_used_page_count] AS [data_space_page_count] 123 | , nps.[reserved_page_count] 124 | - (nps.[reserved_page_count] - nps.[used_page_count]) 125 | - ([in_row_data_page_count] 126 | + [row_overflow_used_page_count]+[lob_used_page_count]) AS [index_space_page_count] 127 | , nps.[row_count] AS [row_count] 128 | FROM 129 | sys.schemas s 130 | INNER JOIN sys.tables t 131 | ON s.[schema_id] = t.[schema_id] 132 | INNER JOIN sys.indexes i 133 | ON t.[object_id] = i.[object_id] 134 | AND i.[index_id] <= 1 135 | INNER JOIN sys.pdw_table_distribution_properties tp 136 | ON t.[object_id] = tp.[object_id] 137 | INNER JOIN sys.pdw_table_mappings tm 138 | ON t.[object_id] = tm.[object_id] 139 | INNER JOIN sys.pdw_nodes_tables nt 140 | ON tm.[physical_name] = nt.[name] 141 | INNER JOIN sys.dm_pdw_nodes pn 142 | ON nt.[pdw_node_id] = pn.[pdw_node_id] 143 | INNER JOIN sys.pdw_distributions di 144 | ON nt.[distribution_id] = di.[distribution_id] 145 | INNER JOIN sys.dm_pdw_nodes_db_partition_stats nps 146 | ON nt.[object_id] = nps.[object_id] 147 | AND nt.[pdw_node_id] = nps.[pdw_node_id] 148 | AND nt.[distribution_id] = nps.[distribution_id] 149 | LEFT OUTER JOIN (select * from sys.pdw_column_distribution_properties where distribution_ordinal = 1) cdp 150 | ON t.[object_id] = cdp.[object_id] 151 | LEFT OUTER JOIN sys.columns c 152 | ON cdp.[object_id] = c.[object_id] 153 | AND cdp.[column_id] = c.[column_id] 154 | WHERE pn.[type] = 'COMPUTE' 155 | ) 156 | , size 157 | AS 158 | ( 159 | SELECT 160 | [execution_time] 161 | , [database_name] 162 | , [schema_name] 163 | , [table_name] 164 | , [two_part_name] 165 | , [node_table_name] 166 | , [node_table_name_seq] 167 | , [distribution_policy_name] 168 | , [distribution_column] 169 | , [distribution_id] 170 | , [index_type] 171 | , [index_type_desc] 172 | , [pdw_node_id] 173 | , [pdw_node_type] 174 | , [pdw_node_name] 175 | , [dist_name] 176 | , [dist_position] 177 | , [partition_nmbr] 178 | , [reserved_space_page_count] 179 | , [unused_space_page_count] 180 | , [data_space_page_count] 181 | , [index_space_page_count] 182 | , [row_count] 183 | , ([reserved_space_page_count] * 8.0) AS [reserved_space_KB] 184 | , ([reserved_space_page_count] * 8.0)/1000 AS [reserved_space_MB] 185 | , ([reserved_space_page_count] * 8.0)/1000000 AS [reserved_space_GB] 186 | , ([reserved_space_page_count] * 8.0)/1000000000 AS [reserved_space_TB] 187 | , ([unused_space_page_count] * 8.0) AS [unused_space_KB] 188 | , ([unused_space_page_count] * 8.0)/1000 AS [unused_space_MB] 189 | , ([unused_space_page_count] * 8.0)/1000000 AS [unused_space_GB] 190 | , ([unused_space_page_count] * 8.0)/1000000000 AS [unused_space_TB] 191 | , ([data_space_page_count] * 8.0) AS [data_space_KB] 192 | , ([data_space_page_count] * 8.0)/1000 AS [data_space_MB] 193 | , ([data_space_page_count] * 8.0)/1000000 AS [data_space_GB] 194 | , ([data_space_page_count] * 8.0)/1000000000 AS [data_space_TB] 195 | , ([index_space_page_count] * 8.0) AS [index_space_KB] 196 | , ([index_space_page_count] * 8.0)/1000 AS [index_space_MB] 197 | , ([index_space_page_count] * 8.0)/1000000 AS [index_space_GB] 198 | , ([index_space_page_count] * 8.0)/1000000000 AS [index_space_TB] 199 | FROM base 200 | ) 201 | SELECT * 202 | FROM size 203 | ``` 204 | 205 | Take a moment to analyze the script above. You have encountered already some of the tables in the previous lab. Here is a short description of the tables and DMVs involved in the query: 206 | 207 | Table Name | Description 208 | ---|--- 209 | sys.schemas | All schemas in the database. 210 | sys.tables | All tables in the database. 211 | sys.indexes | All indexes in the database. 212 | sys.columns | All columns in the database. 213 | sys.pdw_table_mappings | Maps each table to local tables on physical nodes and distributions. 214 | sys.pdw_nodes_tables | Contains information on each local table in each distribution. 215 | sys.pdw_table_distribution_properties | Holds distribution information for tables (the type of distribution tables have). 216 | sys.pdw_column_distribution_properties | Holds distribution information for columns. Filtered to include only columns used to distribute their parent tables (`distribution_ordinal` = 1). 217 | sys.pdw_distributions | Holds information about the distributions from the SQL pool. 218 | sys.dm_pdw_nodes | Holds information about the nodes from the SQL pool. Filtered to include only compute nodes (`type` = `COMPUTE`). 219 | sys.dm_pdw_nodes_db_partition_stats | Returns page and row-count information for every partition in the current database. 220 | 221 | 2. Run the following script to view the details about the structure of the tables in the `wwi_perf` schema: 222 | 223 | ```sql 224 | SELECT 225 | database_name 226 | , schema_name 227 | , table_name 228 | , distribution_policy_name 229 | , distribution_column 230 | , index_type_desc 231 | , COUNT(distinct partition_nmbr) as nbr_partitions 232 | , SUM(row_count) as table_row_count 233 | , SUM(reserved_space_GB) as table_reserved_space_GB 234 | , SUM(data_space_GB) as table_data_space_GB 235 | , SUM(index_space_GB) as table_index_space_GB 236 | , SUM(unused_space_GB) as table_unused_space_GB 237 | FROM 238 | [wwi_perf].[vTableSizes] 239 | WHERE 240 | schema_name = 'wwi_perf' 241 | GROUP BY 242 | database_name 243 | , schema_name 244 | , table_name 245 | , distribution_policy_name 246 | , distribution_column 247 | , index_type_desc 248 | ORDER BY 249 | table_reserved_space_GB desc 250 | ``` 251 | 252 | Analyze the results: 253 | 254 | ![Detailed table space usage](./media/lab4_table_space.png) 255 | 256 | Notice the significant difference between the space used by `CLUSTERED COLUMNSTORE` and `HEAP` or `CLUSTERED` tables. This provides a clear indication on the significant advantages columnstore indexes have. 257 | 258 | Also notice the slight decrease of storage space for ordered CCI table (`Sale_Hash_Ordered`). 259 | 260 | ## Exercise 2 - Understand column store storage details 261 | 262 | ### Task 1 - Create view for column store row group stats 263 | 264 | 1. Run the following query to create the `vColumnStoreRowGroupStats`: 265 | 266 | ```sql 267 | create view [wwi_perf].[vColumnStoreRowGroupStats] 268 | as 269 | with cte 270 | as 271 | ( 272 | select tb.[name] AS [logical_table_name] 273 | , rg.[row_group_id] AS [row_group_id] 274 | , rg.[state] AS [state] 275 | , rg.[state_desc] AS [state_desc] 276 | , rg.[total_rows] AS [total_rows] 277 | , rg.[trim_reason_desc] AS trim_reason_desc 278 | , mp.[physical_name] AS physical_name 279 | FROM sys.[schemas] sm 280 | JOIN sys.[tables] tb ON sm.[schema_id] = tb.[schema_id] 281 | JOIN sys.[pdw_table_mappings] mp ON tb.[object_id] = mp.[object_id] 282 | JOIN sys.[pdw_nodes_tables] nt ON nt.[name] = mp.[physical_name] 283 | JOIN sys.[dm_pdw_nodes_db_column_store_row_group_physical_stats] rg ON rg.[object_id] = nt.[object_id] 284 | AND rg.[pdw_node_id] = nt.[pdw_node_id] 285 | AND rg.[distribution_id] = nt.[distribution_id] 286 | ) 287 | select * 288 | from cte; 289 | ``` 290 | 291 | In this query we are using the `sys.dm_pdw_nodes_db_column_store_row_group_physical_stats` DMV which provides current rowgroup-level information about all of the columnstore indexes in the current database. 292 | 293 | The `state_desc` column provides useful information on the state of a row group: 294 | 295 | Name | Description 296 | ---|--- 297 | `INVISIBLE` | A rowgroup which is being compressed. 298 | `OPEN` | A deltastore rowgroup that is accepting new rows. It is important to remember that an open rowgroup is still in rowstore format and has not been compressed to columnstore format. 299 | `CLOSED` | A deltastore rowgroup that contains the maximum number of rows, and is waiting for the tuple mover process to compress it to the columnstore. 300 | `COMPRESSED` | A row group that is compressed with columnstore compression and stored in the columnstore. 301 | `TOMBSTONE` | A row group that was formerly in the deltastore and is no longer used. 302 | 303 | The `trim_reason_desc` column describes the reason that triggered the `COMPRESSED` rowgroup to have less than the maximum number of rows: 304 | 305 | Name | Description 306 | ---|--- 307 | `UNKNOWN_UPGRADED_FROM_PREVIOUS_VERSION` | Occurred when upgrading from the previous version of SQL Server. 308 | `NO_TRIM` | The row group was not trimmed. The row group was compressed with the maximum of 1,048,476 rows. The number of rows could be less if a subset of rows was deleted after delta rowgroup was closed. 309 | `BULKLOAD` | The bulk-load batch size limited the number of rows. This is what you should be looking for when optimizing data loading, as it is an indicator of resource starvation during the loading process. 310 | `REORG` | Forced compression as part of REORG command. 311 | `DICTIONARY_SIZE` | Dictionary size grew too large to compress all of the rows together. 312 | `MEMORY_LIMITATION` | Not enough available memory to compress all the rows together. 313 | `RESIDUAL_ROW_GROUP` | Closed as part of last row group with rows < 1 million during index build operation. 314 | 315 | ### Task 2 - Explore column store storage details 316 | 317 | 1. Explore the statistics of the columnstore for the `Sale_Partition01` table using the following query: 318 | 319 | ```sql 320 | SELECT 321 | * 322 | FROM 323 | [wwi_perf].[vColumnStoreRowGroupStats] 324 | WHERE 325 | Logical_Table_Name = 'Sale_Partition01' 326 | ``` 327 | 328 | 2. Explore the results of the query: 329 | 330 | ![Column store row group statistics for Sale_Partition01](./media/lab4_column_store_row_groups.png) 331 | 332 | Browse through the results and get an overview of the rowgroup states. Notice the `COMPRESSED` and `OPEN` states of some of the row groups. 333 | 334 | 3. Explore the statistics of the columnstore for the `Sale_Hash_Ordered` table using the same query: 335 | 336 | ```sql 337 | SELECT 338 | * 339 | FROM 340 | [wwi_perf].[vColumnStoreRowGroupStats] 341 | WHERE 342 | Logical_Table_Name = 'Sale_Hash_Ordered' 343 | ``` 344 | 345 | 4. Explore the results of the query: 346 | 347 | ![Column store row group statistics for Sale_Hash_Ordered](./media/lab4_column_store_row_groups_2.png) 348 | 349 | There is a significant difference in the rowgroup states from the previous one. This highlights one of the potential advantages of ordered CCIs. 350 | 351 | ## Exercise 3 - Study the impact of materialized views 352 | 353 | ### Task 1 - Analyze the execution plan of a query 354 | 355 | 1. Run again the query to find the number of customers in each bucket of per-customer transaction items counts: 356 | 357 | ```sql 358 | SELECT 359 | T.TransactionItemsCountBucket 360 | ,count(*) as CustomersCount 361 | FROM 362 | ( 363 | SELECT 364 | CustomerId, 365 | (count(*) - 184) / 100 as TransactionItemsCountBucket 366 | FROM 367 | [wwi_perf].[Sale_Hash] 368 | GROUP BY 369 | CustomerId 370 | ) T 371 | GROUP BY 372 | T.TransactionItemsCountBucket 373 | ORDER BY 374 | T.TransactionItemsCountBucket 375 | ``` 376 | 377 | 2. Improve the query by adding support to calculate the lower margin of the first per-customer transactions items count bucket: 378 | 379 | ```sql 380 | SELECT 381 | T.TransactionItemsCountBucket 382 | ,count(*) as CustomersCount 383 | FROM 384 | ( 385 | SELECT 386 | CustomerId, 387 | ( 388 | COUNT(*) - 389 | ( 390 | SELECT 391 | MIN(TransactionItemsCount) 392 | FROM 393 | ( 394 | SELECT 395 | COUNT(*) as TransactionItemsCount 396 | FROM 397 | [wwi_perf].[Sale_Hash] 398 | GROUP BY 399 | CustomerId 400 | ) X 401 | ) 402 | ) / 100 as TransactionItemsCountBucket 403 | FROM 404 | [wwi_perf].[Sale_Hash] 405 | GROUP BY 406 | CustomerId 407 | ) T 408 | GROUP BY 409 | T.TransactionItemsCountBucket 410 | ORDER BY 411 | T.TransactionItemsCountBucket 412 | ``` 413 | 414 | ### Task 2 - Improve the execution plan of the query with a materialized view 415 | 416 | 1. Run the query with the `EXPLAIN` directive (note the `WITH_RECOMMENDATIONS` option as well): 417 | 418 | ```sql 419 | EXPLAIN WITH_RECOMMENDATIONS 420 | SELECT 421 | T.TransactionItemsCountBucket 422 | ,count(*) as CustomersCount 423 | FROM 424 | ( 425 | SELECT 426 | CustomerId, 427 | ( 428 | COUNT(*) - 429 | ( 430 | SELECT 431 | MIN(TransactionItemsCount) 432 | FROM 433 | ( 434 | SELECT 435 | COUNT(*) as TransactionItemsCount 436 | FROM 437 | [wwi_perf].[Sale_Hash] 438 | GROUP BY 439 | CustomerId 440 | ) X 441 | ) 442 | ) / 100 as TransactionItemsCountBucket 443 | FROM 444 | [wwi_perf].[Sale_Hash] 445 | GROUP BY 446 | CustomerId 447 | ) T 448 | GROUP BY 449 | T.TransactionItemsCountBucket 450 | ORDER BY 451 | T.TransactionItemsCountBucket 452 | ``` 453 | 454 | 2. Analyze the resulting execution plan. Take a close look to the `` section which suggests possible materialized views you can create to improve the performance of the query. 455 | 456 | ```xml 457 | 458 | 459 | SELECT 460 | T.TransactionItemsCountBucket 461 | ,count(*) as CustomersCount 462 | FROM 463 | ( 464 | SELECT 465 | CustomerId, 466 | ( 467 | COUNT(*) - 468 | ( 469 | SELECT 470 | MIN(TransactionItemsCount) 471 | FROM 472 | ( 473 | SELECT 474 | COUNT(*) as TransactionItemsCount 475 | FROM 476 | [wwi_perf].[Sale_Hash] 477 | GROUP BY 478 | CustomerId 479 | ) X 480 | ) 481 | ) / 100 as TransactionItemsCountBucket 482 | FROM 483 | [wwi_perf].[Sale_Hash] 484 | GROUP BY 485 | CustomerId 486 | ) T 487 | GROUP BY 488 | T.TransactionItemsCountBucket 489 | ORDER BY 490 | T.TransactionItemsCountBucket 491 | 492 | CREATE MATERIALIZED VIEW View1 WITH (DISTRIBUTION = HASH([Expr0])) AS 493 | SELECT [SQLPool01].[wwi_perf].[Sale_Hash].[CustomerId] AS [Expr0], 494 | COUNT(*) AS [Expr1] 495 | FROM [wwi_perf].[Sale_Hash] 496 | GROUP BY [SQLPool01].[wwi_perf].[Sale_Hash].[CustomerId] 497 | 498 | 499 | 500 | TEMP_ID_99 501 | 502 | 503 | 504 | 505 | CREATE TABLE [qtabledb].[dbo].[TEMP_ID_99] ([col] INT ) WITH(DISTRIBUTED_MOVE_FILE=''); 506 | 507 | 508 | 509 | 510 | SELECT [T1_1].[col] AS [col] FROM (SELECT MIN([T2_1].[col]) AS [col] FROM (SELECT COUNT(CAST ((0) AS INT)) AS [col], 0 AS [col1] FROM [SQLPool01].[wwi_perf].[Sale_Hash] AS T3_1 GROUP BY [T3_1].[CustomerId]) AS T2_1 GROUP BY [T2_1].[col1]) AS T1_1 511 | OPTION (MAXDOP 6, MIN_GRANT_PERCENT = [MIN_GRANT], DISTRIBUTED_MOVE(N'')) 512 | [TEMP_ID_99] 513 | 514 | 515 | TEMP_ID_100 516 | 517 | 518 | 519 | 520 | CREATE TABLE [qtabledb].[dbo].[TEMP_ID_100] ([col] INT, [col1] BIGINT ) WITH(DISTRIBUTED_MOVE_FILE=''); 521 | 522 | 523 | 524 | 525 | SELECT [T1_1].[col1] AS [col], [T1_1].[col] AS [col1] FROM (SELECT COUNT_BIG(CAST ((0) AS INT)) AS [col], [T2_1].[col] AS [col1] FROM (SELECT (([T3_2].[col] - [T3_1].[col]) / CAST ((100) AS INT)) AS [col] FROM (SELECT MIN([T4_1].[col]) AS [col] FROM [qtabledb].[dbo].[TEMP_ID_99] AS T4_1) AS T3_1 INNER JOIN 526 | (SELECT COUNT(CAST ((0) AS INT)) AS [col] FROM [SQLPool01].[wwi_perf].[Sale_Hash] AS T4_1 GROUP BY [T4_1].[CustomerId]) AS T3_2 527 | ON (0 = 0)) AS T2_1 GROUP BY [T2_1].[col]) AS T1_1 528 | OPTION (MAXDOP 6, MIN_GRANT_PERCENT = [MIN_GRANT], DISTRIBUTED_MOVE(N'')) 529 | [TEMP_ID_100] 530 | col; 531 | 532 | 533 | 534 | 536 | 537 | 538 | 539 | 540 | DROP TABLE [qtabledb].[dbo].[TEMP_ID_100] 541 | 542 | 543 | 544 | 545 | 546 | DROP TABLE [qtabledb].[dbo].[TEMP_ID_99] 547 | 548 | 549 | 550 | 551 | ``` 552 | 553 | 3. Create the suggested materialized view: 554 | 555 | ```sql 556 | CREATE MATERIALIZED VIEW 557 | mvTransactionItemsCounts 558 | WITH 559 | ( 560 | DISTRIBUTION = HASH([CustomerId]) 561 | ) 562 | AS 563 | SELECT 564 | CustomerId 565 | ,COUNT(*) AS ItemsCount 566 | FROM 567 | [wwi_perf].[Sale_Hash] 568 | GROUP BY 569 | CustomerId 570 | ``` 571 | 572 | 4. Check the execution plan again: 573 | 574 | ```sql 575 | EXPLAIN WITH_RECOMMENDATIONS 576 | SELECT 577 | T.TransactionItemsCountBucket 578 | ,count(*) as CustomersCount 579 | FROM 580 | ( 581 | SELECT 582 | CustomerId, 583 | ( 584 | COUNT(*) - 585 | ( 586 | SELECT 587 | MIN(TransactionItemsCount) 588 | FROM 589 | ( 590 | SELECT 591 | COUNT(*) as TransactionItemsCount 592 | FROM 593 | [wwi_perf].[Sale_Hash] 594 | GROUP BY 595 | CustomerId 596 | ) X 597 | ) 598 | ) / 100 as TransactionItemsCountBucket 599 | FROM 600 | [wwi_perf].[Sale_Hash] 601 | GROUP BY 602 | CustomerId 603 | ) T 604 | GROUP BY 605 | T.TransactionItemsCountBucket 606 | ORDER BY 607 | T.TransactionItemsCountBucket 608 | ``` 609 | 610 | The resulting execution plan indicates now the use of the `mvTransactionItemsCounts` (the `BROADCAST_MOVE` distributed SQL operation) materialized view which provides improvements to the query execution time: 611 | 612 | ```xml 613 | 614 | 615 | SELECT 616 | T.TransactionItemsCountBucket 617 | ,count(*) as CustomersCount 618 | FROM 619 | ( 620 | SELECT 621 | CustomerId, 622 | ( 623 | COUNT(*) - 624 | ( 625 | SELECT 626 | MIN(TransactionItemsCount) 627 | FROM 628 | ( 629 | SELECT 630 | COUNT(*) as TransactionItemsCount 631 | FROM 632 | [wwi_perf].[Sale_Hash] 633 | GROUP BY 634 | CustomerId 635 | ) X 636 | ) 637 | ) / 100 as TransactionItemsCountBucket 638 | FROM 639 | [wwi_perf].[Sale_Hash] 640 | GROUP BY 641 | CustomerId 642 | ) T 643 | GROUP BY 644 | T.TransactionItemsCountBucket 645 | ORDER BY 646 | T.TransactionItemsCountBucket 647 | 648 | CREATE MATERIALIZED VIEW View1 WITH (DISTRIBUTION = HASH([Expr0])) AS 649 | SELECT [SQLPool01].[wwi_perf].[Sale_Hash].[CustomerId] AS [Expr0], 650 | COUNT(*) AS [Expr1] 651 | FROM [wwi_perf].[Sale_Hash] 652 | GROUP BY [SQLPool01].[wwi_perf].[Sale_Hash].[CustomerId] 653 | 654 | 655 | 656 | TEMP_ID_111 657 | 658 | 659 | 660 | 661 | CREATE TABLE [qtabledb].[dbo].[TEMP_ID_111] ([col] INT ) WITH(DISTRIBUTED_MOVE_FILE=''); 662 | 663 | 664 | 665 | 666 | SELECT [T1_1].[col] AS [col] FROM (SELECT MIN([T2_1].[col]) AS [col] FROM (SELECT CONVERT (INT, [T3_1].[col], 0) AS [col], 0 AS [col1] FROM (SELECT ISNULL([T4_1].[col], CONVERT (BIGINT, 0, 0)) AS [col] FROM (SELECT SUM([T5_1].[ItemsCount]) AS [col] FROM (SELECT [T6_1].[CustomerId] AS [CustomerId], [T6_1].[ItemsCount] AS [ItemsCount] FROM [SQLPool01].[dbo].[mvTransactionItemsCounts] AS T6_1) AS T5_1 GROUP BY [T5_1].[CustomerId]) AS T4_1) AS T3_1 WHERE ([T3_1].[col] != CAST ((0) AS BIGINT))) AS T2_1 GROUP BY [T2_1].[col1]) AS T1_1 667 | OPTION (MAXDOP 6, MIN_GRANT_PERCENT = [MIN_GRANT], DISTRIBUTED_MOVE(N'')) 668 | [TEMP_ID_111] 669 | 670 | 671 | TEMP_ID_112 672 | 673 | 674 | 675 | 676 | CREATE TABLE [qtabledb].[dbo].[TEMP_ID_112] ([col] INT, [col1] BIGINT ) WITH(DISTRIBUTED_MOVE_FILE=''); 677 | 678 | 679 | 680 | 681 | SELECT [T1_1].[col1] AS [col], [T1_1].[col] AS [col1] FROM (SELECT COUNT_BIG(CAST ((0) AS INT)) AS [col], [T2_1].[col] AS [col1] FROM (SELECT (([T3_2].[col] - [T3_1].[col]) / CAST ((100) AS INT)) AS [col] FROM (SELECT MIN([T4_1].[col]) AS [col] FROM [qtabledb].[dbo].[TEMP_ID_111] AS T4_1) AS T3_1 INNER JOIN 682 | (SELECT CONVERT (INT, [T4_1].[col], 0) AS [col] FROM (SELECT ISNULL([T5_1].[col], CONVERT (BIGINT, 0, 0)) AS [col] FROM (SELECT SUM([T6_1].[ItemsCount]) AS [col] FROM (SELECT [T7_1].[CustomerId] AS [CustomerId], [T7_1].[ItemsCount] AS [ItemsCount] FROM [SQLPool01].[dbo].[mvTransactionItemsCounts] AS T7_1) AS T6_1 GROUP BY [T6_1].[CustomerId]) AS T5_1) AS T4_1 WHERE ([T4_1].[col] != CAST ((0) AS BIGINT))) AS T3_2 683 | ON (0 = 0)) AS T2_1 GROUP BY [T2_1].[col]) AS T1_1 684 | OPTION (MAXDOP 6, MIN_GRANT_PERCENT = [MIN_GRANT], DISTRIBUTED_MOVE(N'')) 685 | [TEMP_ID_112] 686 | col; 687 | 688 | 689 | 690 | 692 | 693 | 694 | 695 | 696 | DROP TABLE [qtabledb].[dbo].[TEMP_ID_112] 697 | 698 | 699 | 700 | 701 | 702 | DROP TABLE [qtabledb].[dbo].[TEMP_ID_111] 703 | 704 | 705 | 706 | 707 | ``` 708 | 709 | ## Exercise 4 - Avoid extensive logging 710 | 711 | ### Task 1 - Explore rules for minimally logged operations 712 | 713 | The following operations are capable of being minimally logged: 714 | 715 | - CREATE TABLE AS SELECT (CTAS) 716 | - INSERT..SELECT 717 | - CREATE INDEX 718 | - ALTER INDEX REBUILD 719 | - DROP INDEX 720 | - TRUNCATE TABLE 721 | - DROP TABLE 722 | - ALTER TABLE SWITCH PARTITION 723 | 724 | **Minimal logging with bulk load** 725 | 726 | CTAS and INSERT...SELECT are both bulk load operations. However, both are influenced by the target table definition and depend on the load scenario. The following table explains when bulk operations are fully or minimally logged: 727 | 728 | | Primary Index | Load Scenario | Logging Mode | 729 | | --- | --- | --- | 730 | | Heap |Any |**Minimal** | 731 | | Clustered Index |Empty target table |**Minimal** | 732 | | Clustered Index |Loaded rows do not overlap with existing pages in target |**Minimal** | 733 | | Clustered Index |Loaded rows overlap with existing pages in target |Full | 734 | | Clustered Columnstore Index |Batch size >= 102,400 per partition aligned distribution |**Minimal** | 735 | | Clustered Columnstore Index |Batch size < 102,400 per partition aligned distribution |Full | 736 | 737 | It is worth noting that any writes to update secondary or non-clustered indexes will always be fully logged operations. 738 | 739 | > **IMPORTANT** 740 | > 741 | > A Synapse Analytics SQL pool has 60 distributions. Therefore, assuming all rows are evenly distributed and landing in a single partition, your batch will need to contain 6,144,000 rows or larger to be minimally logged when writing to a Clustered Columnstore Index. If the table is partitioned and the rows being inserted span partition boundaries, then you will need 6,144,000 rows per partition boundary assuming even data distribution. Each partition in each distribution must independently exceed the 102,400 row threshold for the insert to be minimally logged into the distribution. 742 | 743 | Loading data into a non-empty table with a clustered index can often contain a mixture of fully logged and minimally logged rows. A clustered index is a balanced tree (b-tree) of pages. If the page being written to already contains rows from another transaction, then these writes will be fully logged. However, if the page is empty then the write to that page will be minimally logged. 744 | 745 | ### Task 2 - Optimizing a delete operation 746 | 747 | 1. Check the number of transaction items for customers with ids lower than 900000 using the following query: 748 | 749 | ```sql 750 | SELECT 751 | COUNT_BIG(*) as TransactionItemsCount 752 | FROM 753 | [wwi_perf].[Sale_Hash] 754 | WHERE 755 | CustomerId < 900000 756 | ``` 757 | 758 | 2. Implement a minimal logging approach to delete transaction items for customers with ids lower than 900000. Use the following CTAS query to isolate the transaction items that should be kept: 759 | 760 | ```sql 761 | CREATE TABLE [wwi_perf].[Sale_Hash_v2] 762 | WITH 763 | ( 764 | DISTRIBUTION = ROUND_ROBIN, 765 | HEAP 766 | ) 767 | AS 768 | SELECT 769 | * 770 | FROM 771 | [wwi_perf].[Sale_Hash] 772 | WHERE 773 | CustomerId >= 900000 774 | ``` 775 | 776 | The query should execute within a few minutes. All that would remain to complete the process would be to delete the `Sale_Heap` table and rename `Sale_Heap_v2` to `Sale_Heap`. 777 | 778 | 3. Compare the previous operation with a classical delete: 779 | 780 | ```sql 781 | DELETE 782 | [wwi_perf].[Sale_Hash] 783 | WHERE 784 | CustomerId < 900000 785 | ``` 786 | 787 | >**Note** 788 | > 789 | >The query will run for a potentially long time. Once the time exceeds significantly the time to run the previous CTAS query, you can cancel it (as you can already see the benefit of the CTAS-based approach). 790 | -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_materialized_view_rebuilt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_materialized_view_rebuilt.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_materialized_view_updated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_materialized_view_updated.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_ordered_cci.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_ordered_cci.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_ordered_cci_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_ordered_cci_2.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_query_id.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_query_id.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_result_set_cache_build.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_result_set_cache_build.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_result_set_cache_off.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_result_set_cache_off.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_result_set_cache_size.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_result_set_cache_size.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_result_set_caching_db.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_result_set_caching_db.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_shuffle_move.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_shuffle_move.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_shuffle_move_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_shuffle_move_2.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_shuffle_move_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_shuffle_move_3.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_shuffle_move_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_shuffle_move_4.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_statistics_automated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_statistics_automated.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_statistics_customerid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_statistics_customerid.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab3_table_space_usage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab3_table_space_usage.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab4_column_store_row_groups.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab4_column_store_row_groups.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab4_column_store_row_groups_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab4_column_store_row_groups_2.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab4_data_skew_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab4_data_skew_1.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab4_data_skew_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab4_data_skew_2.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab4_data_type_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab4_data_type_selection.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab4_table_space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab4_table_space.png -------------------------------------------------------------------------------- /Synapse/lab1/media/lab4_transaction_items_count_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/lab4_transaction_items_count_distribution.png -------------------------------------------------------------------------------- /Synapse/lab1/media/manage-hub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/manage-hub.png -------------------------------------------------------------------------------- /Synapse/lab1/media/resume-sql-pool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab1/media/resume-sql-pool.png -------------------------------------------------------------------------------- /Synapse/lab1/readme.md: -------------------------------------------------------------------------------- 1 | # Azure Synapse Analytics SQL Pool Performance Optimization 2 | 3 | ## Deploy and configure lab environment 4 | 5 | Before starting the lab exercises, follow the [setup instructions](../setup/readme.md) make sure you deployed and configured your lab environment properly. 6 | 7 | ## Run lab 8 | 9 | The lab has two parts, covering different aspects of SQL Pool performance optimization. 10 | 11 | [Part 1](./lab1-sqlpool-perf-optimization-part-1.md) covers the following topics: 12 | 13 | - Explore query performance and improve table structure 14 | - Improve query performance 15 | 16 | [Part 2](./lab1-sqlpool-perf-optimization-part-2.md) covers the following topics: 17 | 18 | - Check for skewed data and space usage 19 | - Understand column store storage details 20 | - Study the impact of materialized views 21 | - Avoid extensive loggings 22 | 23 | To run the lab, follow the step-by-step instructions accessible through the links above. 24 | 25 | 26 | -------------------------------------------------------------------------------- /Synapse/lab2/media/attach-spark-pool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/attach-spark-pool.png -------------------------------------------------------------------------------- /Synapse/lab2/media/copy-pipeline-trigger-now.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/copy-pipeline-trigger-now.png -------------------------------------------------------------------------------- /Synapse/lab2/media/cosmos-dataset-preview-data-link.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/cosmos-dataset-preview-data-link.png -------------------------------------------------------------------------------- /Synapse/lab2/media/cosmos-db-dataset-preview-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/cosmos-db-dataset-preview-data.png -------------------------------------------------------------------------------- /Synapse/lab2/media/cosmos-db-dataset-schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/cosmos-db-dataset-schema.png -------------------------------------------------------------------------------- /Synapse/lab2/media/create-adls-dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/create-adls-dataset.png -------------------------------------------------------------------------------- /Synapse/lab2/media/create-cosmos-db-dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/create-cosmos-db-dataset.png -------------------------------------------------------------------------------- /Synapse/lab2/media/create-cosmos-db-linked-service-step1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/create-cosmos-db-linked-service-step1.png -------------------------------------------------------------------------------- /Synapse/lab2/media/create-cosmos-db-linked-service.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/create-cosmos-db-linked-service.png -------------------------------------------------------------------------------- /Synapse/lab2/media/daily-sales-counts-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/daily-sales-counts-chart.png -------------------------------------------------------------------------------- /Synapse/lab2/media/data-hub-parquet-select-rows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/data-hub-parquet-select-rows.png -------------------------------------------------------------------------------- /Synapse/lab2/media/data-hub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/data-hub.png -------------------------------------------------------------------------------- /Synapse/lab2/media/develop-hub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/develop-hub.png -------------------------------------------------------------------------------- /Synapse/lab2/media/getting-started.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/getting-started.png -------------------------------------------------------------------------------- /Synapse/lab2/media/manage-hub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/manage-hub.png -------------------------------------------------------------------------------- /Synapse/lab2/media/monitor-hub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/monitor-hub.png -------------------------------------------------------------------------------- /Synapse/lab2/media/new-cell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/new-cell.png -------------------------------------------------------------------------------- /Synapse/lab2/media/new-dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/new-dataset.png -------------------------------------------------------------------------------- /Synapse/lab2/media/new-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/new-pipeline.png -------------------------------------------------------------------------------- /Synapse/lab2/media/new-spark-notebook-sales.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/new-spark-notebook-sales.png -------------------------------------------------------------------------------- /Synapse/lab2/media/orchestrate-hub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/orchestrate-hub.png -------------------------------------------------------------------------------- /Synapse/lab2/media/pipeline-copy-sales-drag-copy-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/pipeline-copy-sales-drag-copy-data.png -------------------------------------------------------------------------------- /Synapse/lab2/media/pipeline-copy-sales-name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/pipeline-copy-sales-name.png -------------------------------------------------------------------------------- /Synapse/lab2/media/pipeline-copy-sales-pipeline-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/pipeline-copy-sales-pipeline-run.png -------------------------------------------------------------------------------- /Synapse/lab2/media/pipeline-copy-sales-settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/pipeline-copy-sales-settings.png -------------------------------------------------------------------------------- /Synapse/lab2/media/pipeline-copy-sales-sink-dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/pipeline-copy-sales-sink-dataset.png -------------------------------------------------------------------------------- /Synapse/lab2/media/pipeline-copy-sales-sink-mapping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/pipeline-copy-sales-sink-mapping.png -------------------------------------------------------------------------------- /Synapse/lab2/media/pipeline-copy-sales-sink-settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/pipeline-copy-sales-sink-settings.png -------------------------------------------------------------------------------- /Synapse/lab2/media/pipeline-copy-sales-source-dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/pipeline-copy-sales-source-dataset.png -------------------------------------------------------------------------------- /Synapse/lab2/media/publish-all-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/publish-all-1.png -------------------------------------------------------------------------------- /Synapse/lab2/media/rename-publish-sql-script.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/rename-publish-sql-script.png -------------------------------------------------------------------------------- /Synapse/lab2/media/resume-sql-pool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/resume-sql-pool.png -------------------------------------------------------------------------------- /Synapse/lab2/media/select-workspace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/select-workspace.png -------------------------------------------------------------------------------- /Synapse/lab2/media/sort-desc-error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/sort-desc-error.png -------------------------------------------------------------------------------- /Synapse/lab2/media/spark-explode-output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/spark-explode-output.png -------------------------------------------------------------------------------- /Synapse/lab2/media/spark-grouped-top-purchases-total-items.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/spark-grouped-top-purchases-total-items.png -------------------------------------------------------------------------------- /Synapse/lab2/media/spark-grouped-top-purchases.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/spark-grouped-top-purchases.png -------------------------------------------------------------------------------- /Synapse/lab2/media/spark-json-output-nested.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/spark-json-output-nested.png -------------------------------------------------------------------------------- /Synapse/lab2/media/spark-toppurchases-output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/spark-toppurchases-output.png -------------------------------------------------------------------------------- /Synapse/lab2/media/sql-on-demand-selected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/sql-on-demand-selected.png -------------------------------------------------------------------------------- /Synapse/lab2/media/sql-serverless-aggregates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/sql-serverless-aggregates.png -------------------------------------------------------------------------------- /Synapse/lab2/media/sqlpool01-import01-linked-service.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/sqlpool01-import01-linked-service.png -------------------------------------------------------------------------------- /Synapse/lab2/media/synapse-studio-new-sql-script.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/synapse-studio-new-sql-script.png -------------------------------------------------------------------------------- /Synapse/lab2/media/synapse-studio-query-toolbar-connect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/synapse-studio-query-toolbar-connect.png -------------------------------------------------------------------------------- /Synapse/lab2/media/synapse-studio-query-toolbar-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/synapse-studio-query-toolbar-run.png -------------------------------------------------------------------------------- /Synapse/lab2/media/workload-classifiers-query-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/lab2/media/workload-classifiers-query-results.png -------------------------------------------------------------------------------- /Synapse/lab2/readme.md: -------------------------------------------------------------------------------- 1 | # Azure Synapse Analytics Data Integration 2 | 3 | ## Deploy and configure lab environment 4 | 5 | Before starting the lab exercises, follow the [setup instructions](../setup/readme.md) make sure you deployed and configured your lab environment properly. 6 | 7 | ## Run lab 8 | 9 | The lab covers various aspects of data integration: 10 | 11 | - Configure linked service and create datasets 12 | - Explore source data in the Data hub 13 | - Import sales data with PolyBase and COPY using T-SQL 14 | - Import sales data with COPY using a pipeline 15 | 16 | To run the lab, follow the [step-by-step instructions](./lab2-data-integration.md). -------------------------------------------------------------------------------- /Synapse/readme.md: -------------------------------------------------------------------------------- 1 | # Azure Synapse Analytics Labs 2 | 3 | The following labs are available for Azure Synapse Analytics: 4 | 5 | [Lab 1 - SQL Pool Performance Optimization](./lab1/readme.md), covers the following topics: 6 | 7 | - Explore query performance and improve table structure 8 | - Improve query performance 9 | - Check for skewed data and space usage 10 | - Understand column store storage details 11 | - Study the impact of materialized views 12 | - Avoid extensive loggings 13 | 14 | [Lab 2 - Data Integration](./lab2/readme.md), covers the following topics: 15 | 16 | - Configure linked service and create datasets 17 | - Explore source data in the Data hub 18 | - Import sales data with PolyBase and COPY using T-SQL 19 | - Import sales data with COPY using a pipeline 20 | -------------------------------------------------------------------------------- /Synapse/setup/automation/00-asa-workspace-core.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "uniqueSuffix": { 6 | "type": "String", 7 | "metadata": { 8 | "description": "Suffix added to all resource name to make them unique." 9 | } 10 | }, 11 | "sqlAdministratorLoginPassword": { 12 | "type": "String", 13 | "metadata": { 14 | "description": "Password for SQL Admin" 15 | } 16 | } 17 | }, 18 | "variables": { 19 | "location": "[resourceGroup().location]", 20 | "sqlAdministratorLogin": "asa.sql.admin", 21 | "workspaceName": "[concat('asaworkspace', parameters('uniqueSuffix'))]", 22 | "adlsStorageAccountName": "[concat('asadatalake', parameters('uniqueSuffix'))]", 23 | "defaultDataLakeStorageFilesystemName": "tempdata", 24 | "sqlComputeName": "SQLPool01", 25 | "computeSubnetId": "", 26 | "sqlServerSKU": "DW500c", 27 | "storageBlobDataContributorRoleID": "ba92f5b4-2d11-453d-a403-e96b0029c9fe", 28 | "storageBlobDataOwnerRoleID": "b7e6dc6d-f1e8-4753-8033-0f276bb0955b", 29 | "defaultDataLakeStorageAccountUrl": "[concat('https://', variables('adlsStorageAccountName'), '.dfs.core.windows.net')]", 30 | "keyVaultName": "[concat('asakeyvault', parameters('uniqueSuffix'))]", 31 | "blobStorageAccountName": "[concat('asastore', parameters('uniqueSuffix'))]", 32 | "applicationInsightsName": "[concat('asaappinsights', parameters('uniqueSuffix'))]", 33 | "cosmosDbAccountName": "[concat('asacosmosdb', parameters('uniqueSuffix'))]" 34 | }, 35 | "resources": [ 36 | { 37 | "type": "Microsoft.Resources/tags", 38 | "name": "default", 39 | "apiVersion": "2019-10-01", 40 | "dependsOn": [], 41 | "properties": { 42 | "tags": { 43 | "DeploymentId": "[parameters('uniqueSuffix')]" 44 | } 45 | } 46 | }, 47 | { 48 | "type": "Microsoft.Storage/storageAccounts", 49 | "apiVersion": "2019-04-01", 50 | "name": "[variables('blobStorageAccountName')]", 51 | "location": "[variables('location')]", 52 | "sku": { 53 | "name": "Standard_LRS" 54 | }, 55 | "kind": "StorageV2", 56 | "properties": { 57 | "encryption": { 58 | "services": { 59 | "blob": { 60 | "enabled": true 61 | }, 62 | "file": { 63 | "enabled": true 64 | } 65 | }, 66 | "keySource": "Microsoft.Storage" 67 | }, 68 | "supportsHttpsTrafficOnly": true 69 | } 70 | }, 71 | { 72 | "type": "Microsoft.Storage/storageAccounts/blobServices", 73 | "apiVersion": "2019-06-01", 74 | "name": "[concat(variables('blobStorageAccountName'), '/default')]", 75 | "dependsOn": [ 76 | "[resourceId('Microsoft.Storage/storageAccounts', variables('blobStorageAccountName'))]" 77 | ], 78 | "sku": { 79 | "name": "Standard_LRS", 80 | "tier": "Standard" 81 | }, 82 | "properties": { 83 | "cors": { 84 | "corsRules": [] 85 | }, 86 | "deleteRetentionPolicy": { 87 | "enabled": false 88 | } 89 | } 90 | }, 91 | { 92 | "type": "Microsoft.KeyVault/vaults", 93 | "apiVersion": "2018-02-14", 94 | "name": "[variables('keyVaultName')]", 95 | "location": "[variables('location')]", 96 | "properties": { 97 | "tenantId": "[subscription().tenantId]", 98 | "sku": { 99 | "name": "standard", 100 | "family": "A" 101 | }, 102 | "accessPolicies": [] 103 | } 104 | }, 105 | { 106 | "type": "Microsoft.KeyVault/vaults/secrets", 107 | "name": "[concat(variables('keyVaultName'), '/SqlPassword')]", 108 | "apiVersion": "2018-02-14", 109 | "location": "[variables('location')]", 110 | "dependsOn": [ 111 | "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]" 112 | ], 113 | "properties": { 114 | "value": "[parameters('sqlAdministratorLoginPassword')]" 115 | } 116 | }, 117 | { 118 | "type": "Microsoft.Storage/storageAccounts", 119 | "apiVersion": "2019-06-01", 120 | "name": "[variables('adlsStorageAccountName')]", 121 | "location": "[variables('location')]", 122 | "dependsOn": [], 123 | "tags": {}, 124 | "sku": { 125 | "name": "Standard_LRS" 126 | }, 127 | "kind": "StorageV2", 128 | "properties": { 129 | "accessTier": "Hot", 130 | "supportsHttpsTrafficOnly": "true", 131 | "isHnsEnabled": "true", 132 | "largeFileSharesState": "Disabled" 133 | }, 134 | "resources": [ 135 | { 136 | "type": "blobServices/containers", 137 | "apiVersion": "2018-07-01", 138 | "name": "[concat('default/', variables('defaultDataLakeStorageFilesystemName'))]", 139 | "dependsOn": [ 140 | "[concat('Microsoft.Storage/storageAccounts/', variables('adlsStorageAccountName'))]" 141 | ] 142 | } 143 | ] 144 | }, 145 | { 146 | "type": "Microsoft.Storage/storageAccounts/blobServices", 147 | "apiVersion": "2019-06-01", 148 | "name": "[concat(variables('adlsStorageAccountName'), '/default')]", 149 | "dependsOn": [ 150 | "[resourceId('Microsoft.Storage/storageAccounts', variables('adlsStorageAccountName'))]" 151 | ], 152 | "sku": { 153 | "name": "Standard_LRS", 154 | "tier": "Standard" 155 | }, 156 | "properties": { 157 | "cors": { 158 | "corsRules": [] 159 | }, 160 | "deleteRetentionPolicy": { 161 | "enabled": false 162 | } 163 | } 164 | }, 165 | { 166 | "type": "Microsoft.Storage/storageAccounts/fileServices", 167 | "apiVersion": "2019-06-01", 168 | "name": "[concat(variables('adlsStorageAccountName'), '/default')]", 169 | "dependsOn": [ 170 | "[resourceId('Microsoft.Storage/storageAccounts', variables('adlsStorageAccountName'))]" 171 | ], 172 | "sku": { 173 | "name": "Standard_LRS", 174 | "tier": "Standard" 175 | }, 176 | "properties": { 177 | "cors": { 178 | "corsRules": [] 179 | } 180 | } 181 | }, 182 | { 183 | "type": "Microsoft.Synapse/workspaces", 184 | "apiVersion": "2019-06-01-preview", 185 | "name": "[variables('workspaceName')]", 186 | "location": "[variables('location')]", 187 | "dependsOn": [ 188 | "[concat('Microsoft.Storage/storageAccounts/', variables('adlsStorageAccountName'), '/blobServices/default/containers/', variables('defaultDataLakeStorageFilesystemName'))]" 189 | ], 190 | "identity": { 191 | "type": "SystemAssigned" 192 | }, 193 | "properties": { 194 | "managedResourceGroupName": "[concat(resourceGroup().name, '-internal')]", 195 | "defaultDataLakeStorage": { 196 | "accountUrl": "[variables('defaultDataLakeStorageAccountUrl')]", 197 | "filesystem": "[variables('defaultDataLakeStorageFilesystemName')]" 198 | }, 199 | "virtualNetworkProfile": { 200 | "computeSubnetId": "[variables('computeSubnetId')]" 201 | }, 202 | "sqlAdministratorLogin": "[variables('sqlAdministratorLogin')]", 203 | "sqlAdministratorLoginPassword": "[parameters('sqlAdministratorLoginPassword')]" 204 | }, 205 | "resources": [ 206 | { 207 | "type": "firewallrules", 208 | "apiVersion": "2019-06-01-preview", 209 | "name": "allowAll", 210 | "location": "[variables('location')]", 211 | "dependsOn": [ 212 | "[concat('Microsoft.Synapse/workspaces/', variables('workspaceName'))]" 213 | ], 214 | "properties": { 215 | "startIpAddress": "0.0.0.0", 216 | "endIpAddress": "255.255.255.255" 217 | } 218 | } 219 | ] 220 | }, 221 | { 222 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers/providers/roleAssignments", 223 | "apiVersion": "2018-09-01-preview", 224 | "name": "[concat(variables('adlsStorageAccountName'), '/default/', variables('defaultDataLakeStorageFilesystemName'), '/Microsoft.Authorization/', guid(concat(resourceGroup().id, '/', variables('storageBlobDataOwnerRoleID'), '/', variables('workspaceName'))))]", 225 | "location": "[variables('location')]", 226 | "dependsOn": [ 227 | "[concat('Microsoft.Synapse/workspaces/', variables('workspaceName'))]" 228 | ], 229 | "properties": { 230 | "roleDefinitionId": "[resourceId('Microsoft.Authorization/roleDefinitions', variables('storageBlobDataOwnerRoleID'))]", 231 | "principalId": "[reference(concat('Microsoft.Synapse/workspaces/', variables('workspaceName')), '2019-06-01-preview', 'Full').identity.principalId]", 232 | "principalType": "ServicePrincipal" 233 | } 234 | }, 235 | { 236 | "type": "Microsoft.Authorization/roleAssignments", 237 | "apiVersion": "2018-09-01-preview", 238 | "name": "[guid(concat(resourceGroup().id, '/', variables('storageBlobDataOwnerRoleID'), '/', variables('workspaceName'), '2'))]", 239 | "location": "[variables('location')]", 240 | "dependsOn": [ 241 | "[concat('Microsoft.Synapse/workspaces/', variables('workspaceName'))]" 242 | ], 243 | "properties": { 244 | "roleDefinitionId": "[resourceId('Microsoft.Authorization/roleDefinitions', variables('storageBlobDataOwnerRoleID'))]", 245 | "principalId": "[reference(concat('Microsoft.Synapse/workspaces/', variables('workspaceName')), '2019-06-01-preview', 'Full').identity.principalId]", 246 | "principalType": "ServicePrincipal" 247 | } 248 | }, 249 | { 250 | "type": "Microsoft.Synapse/workspaces/sqlPools", 251 | "apiVersion": "2019-06-01-preview", 252 | "name": "[concat(variables('workspaceName'), '/', variables('sqlComputeName'))]", 253 | "location": "[variables('location')]", 254 | "dependsOn": [ 255 | "[concat('Microsoft.Synapse/workspaces/', variables('workspaceName'))]" 256 | ], 257 | "sku": { 258 | "name": "[variables('sqlServerSKU')]" 259 | }, 260 | "properties": { 261 | "createMode": "Default", 262 | "collation": "SQL_Latin1_General_CP1_CI_AS" 263 | } 264 | }, 265 | { 266 | "type": "Microsoft.Insights/components", 267 | "apiVersion": "2018-05-01-preview", 268 | "name": "[variables('applicationInsightsName')]", 269 | "location": "[if(or(equals(variables('location'),'eastus2'),equals(variables('location'),'westcentralus')),'southcentralus',variables('location'))]", 270 | "kind": "web", 271 | "properties": { 272 | "Application_Type": "web" 273 | } 274 | }, 275 | { 276 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers", 277 | "apiVersion": "2019-06-01", 278 | "name": "[concat(variables('adlsStorageAccountName'), '/default/staging')]", 279 | "dependsOn": [ 280 | "[resourceId('Microsoft.Storage/storageAccounts/blobServices', variables('adlsStorageAccountName'), 'default')]", 281 | "[resourceId('Microsoft.Storage/storageAccounts', variables('adlsStorageAccountName'))]" 282 | ], 283 | "properties": { 284 | "publicAccess": "None" 285 | } 286 | }, 287 | { 288 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers", 289 | "apiVersion": "2019-06-01", 290 | "name": "[concat(variables('adlsStorageAccountName'), '/default/wwi-02')]", 291 | "dependsOn": [ 292 | "[resourceId('Microsoft.Storage/storageAccounts/blobServices', variables('adlsStorageAccountName'), 'default')]", 293 | "[resourceId('Microsoft.Storage/storageAccounts', variables('adlsStorageAccountName'))]" 294 | ], 295 | "properties": { 296 | "publicAccess": "None" 297 | } 298 | }, 299 | { 300 | "type": "Microsoft.Storage/storageAccounts/blobServices/containers", 301 | "apiVersion": "2019-06-01", 302 | "name": "[concat(variables('blobStorageAccountName'), '/default/staging')]", 303 | "dependsOn": [ 304 | "[resourceId('Microsoft.Storage/storageAccounts/blobServices', variables('blobStorageAccountName'), 'default')]", 305 | "[resourceId('Microsoft.Storage/storageAccounts', variables('blobStorageAccountName'))]" 306 | ], 307 | "properties": { 308 | "publicAccess": "None" 309 | } 310 | }, 311 | { 312 | "type": "Microsoft.DocumentDB/databaseAccounts", 313 | "apiVersion": "2020-03-01", 314 | "name": "[variables('cosmosDbAccountName')]", 315 | "location": "[variables('location')]", 316 | "dependsOn": [ 317 | "[concat('Microsoft.Synapse/workspaces/', variables('workspaceName'), '/sqlPools/', variables('sqlComputeName'))]", 318 | "[concat('Microsoft.Synapse/workspaces/', variables('workspaceName'))]", 319 | "[concat('Microsoft.Storage/storageAccounts/', variables('adlsStorageAccountName'), '/blobServices/default/containers/', variables('defaultDataLakeStorageFilesystemName'))]" 320 | ], 321 | "tags": { 322 | "defaultExperience": "Core (SQL)", 323 | "hidden-cosmos-mmspecial": "", 324 | "CosmosAccountType": "Non-Production" 325 | }, 326 | "kind": "GlobalDocumentDB", 327 | "properties": { 328 | "publicNetworkAccess": "Enabled", 329 | "enableAutomaticFailover": false, 330 | "enableMultipleWriteLocations": false, 331 | "isVirtualNetworkFilterEnabled": false, 332 | "virtualNetworkRules": [], 333 | "disableKeyBasedMetadataWriteAccess": false, 334 | "databaseAccountOfferType": "Standard", 335 | "consistencyPolicy": { 336 | "defaultConsistencyLevel": "Session", 337 | "maxIntervalInSeconds": 5, 338 | "maxStalenessPrefix": 100 339 | }, 340 | "locations": [ 341 | { 342 | "locationName": "[variables('location')]", 343 | "provisioningState": "Succeeded", 344 | "failoverPriority": 0, 345 | "isZoneRedundant": false 346 | } 347 | ], 348 | "capabilities": [] 349 | } 350 | }, 351 | { 352 | "type": "Microsoft.DocumentDB/databaseAccounts/sqlDatabases", 353 | "apiVersion": "2020-03-01", 354 | "name": "[concat(variables('cosmosDbAccountName'), '/CustomerProfile')]", 355 | "dependsOn": [ 356 | "[resourceId('Microsoft.DocumentDB/databaseAccounts', variables('cosmosDbAccountName'))]" 357 | ], 358 | "properties": { 359 | "resource": { 360 | "id": "CustomerProfile" 361 | }, 362 | "options": {} 363 | } 364 | }, 365 | { 366 | "type": "Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers", 367 | "apiVersion": "2020-03-01", 368 | "name": "[concat(variables('cosmosDbAccountName'), '/CustomerProfile/OnlineUserProfile01')]", 369 | "dependsOn": [ 370 | "[resourceId('Microsoft.DocumentDB/databaseAccounts/sqlDatabases', variables('cosmosDbAccountName'), 'CustomerProfile')]", 371 | "[resourceId('Microsoft.DocumentDB/databaseAccounts', variables('cosmosDbAccountName'))]" 372 | ], 373 | "properties": { 374 | "resource": { 375 | "id": "OnlineUserProfile01", 376 | "indexingPolicy": { 377 | "indexingMode": "consistent", 378 | "automatic": true, 379 | "includedPaths": [ 380 | { 381 | "path": "/*" 382 | } 383 | ], 384 | "excludedPaths": [ 385 | { 386 | "path": "/\"_etag\"/?" 387 | } 388 | ] 389 | }, 390 | "partitionKey": { 391 | "paths": [ 392 | "/userId" 393 | ], 394 | "kind": "Hash" 395 | }, 396 | "uniqueKeyPolicy": { 397 | "uniqueKeys": [] 398 | }, 399 | "conflictResolutionPolicy": { 400 | "mode": "LastWriterWins", 401 | "conflictResolutionPath": "/_ts" 402 | } 403 | }, 404 | "options": {} 405 | } 406 | }, 407 | { 408 | "type": "Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers", 409 | "apiVersion": "2020-03-01", 410 | "name": "[concat(variables('cosmosDbAccountName'), '/CustomerProfile/Pipeline')]", 411 | "dependsOn": [ 412 | "[resourceId('Microsoft.DocumentDB/databaseAccounts/sqlDatabases', variables('cosmosDbAccountName'), 'CustomerProfile')]", 413 | "[resourceId('Microsoft.DocumentDB/databaseAccounts', variables('cosmosDbAccountName'))]" 414 | ], 415 | "properties": { 416 | "resource": { 417 | "id": "Pipeline", 418 | "indexingPolicy": { 419 | "indexingMode": "consistent", 420 | "automatic": true, 421 | "includedPaths": [ 422 | { 423 | "path": "/*" 424 | } 425 | ], 426 | "excludedPaths": [ 427 | { 428 | "path": "/\"_etag\"/?" 429 | } 430 | ] 431 | }, 432 | "partitionKey": { 433 | "paths": [ 434 | "/id" 435 | ], 436 | "kind": "Hash" 437 | }, 438 | "uniqueKeyPolicy": { 439 | "uniqueKeys": [] 440 | }, 441 | "conflictResolutionPolicy": { 442 | "mode": "LastWriterWins", 443 | "conflictResolutionPath": "/_ts" 444 | } 445 | }, 446 | "options": {} 447 | } 448 | }, 449 | { 450 | "type": "Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/throughputSettings", 451 | "apiVersion": "2020-03-01", 452 | "name": "[concat(variables('cosmosDbAccountName'), '/CustomerProfile/OnlineUserProfile01/default')]", 453 | "dependsOn": [ 454 | "[resourceId('Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers', variables('cosmosDbAccountName'), 'CustomerProfile', 'OnlineUserProfile01')]", 455 | "[resourceId('Microsoft.DocumentDB/databaseAccounts/sqlDatabases', variables('cosmosDbAccountName'), 'CustomerProfile')]", 456 | "[resourceId('Microsoft.DocumentDB/databaseAccounts', variables('cosmosDbAccountName'))]" 457 | ], 458 | "properties": { 459 | "resource": { 460 | "throughput": 10000 461 | } 462 | } 463 | }, 464 | { 465 | "type": "Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers/throughputSettings", 466 | "apiVersion": "2020-03-01", 467 | "name": "[concat(variables('cosmosDbAccountName'), '/CustomerProfile/Pipeline/default')]", 468 | "dependsOn": [ 469 | "[resourceId('Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers', variables('cosmosDbAccountName'), 'CustomerProfile', 'Pipeline')]", 470 | "[resourceId('Microsoft.DocumentDB/databaseAccounts/sqlDatabases', variables('cosmosDbAccountName'), 'CustomerProfile')]", 471 | "[resourceId('Microsoft.DocumentDB/databaseAccounts', variables('cosmosDbAccountName'))]" 472 | ], 473 | "properties": { 474 | "resource": { 475 | "throughput": 400 476 | } 477 | } 478 | } 479 | ], 480 | "outputs": {} 481 | } 482 | -------------------------------------------------------------------------------- /Synapse/setup/automation/01-environment-setup.ps1: -------------------------------------------------------------------------------- 1 | $InformationPreference = "Continue" 2 | $IsCloudLabs = Test-Path C:\LabFiles\AzureCreds.ps1; 3 | 4 | if($IsCloudLabs){ 5 | if(Get-Module -Name solliance-synapse-automation){ 6 | Remove-Module solliance-synapse-automation 7 | } 8 | Import-Module "..\solliance-synapse-automation" 9 | Initialize-ExecutionTiming 10 | 11 | . C:\LabFiles\AzureCreds.ps1 12 | 13 | $userName = $AzureUserName # READ FROM FILE 14 | $password = $AzurePassword # READ FROM FILE 15 | $clientId = $TokenGeneratorClientId # READ FROM FILE 16 | #$global:sqlPassword = $AzureSQLPassword # READ FROM FILE 17 | 18 | $securePassword = $password | ConvertTo-SecureString -AsPlainText -Force 19 | $cred = new-object -typename System.Management.Automation.PSCredential -argumentlist $userName, $SecurePassword 20 | 21 | Connect-AzAccount -Credential $cred | Out-Null 22 | 23 | $resourceGroupName = (Get-AzResourceGroup | Where-Object { $_.ResourceGroupName -like "*-L400*" }).ResourceGroupName 24 | 25 | if ($resourceGroupName.Count -gt 1) 26 | { 27 | $resourceGroupName = $resourceGroupName[0]; 28 | } 29 | 30 | $ropcBodyCore = "client_id=$($clientId)&username=$($userName)&password=$($password)&grant_type=password" 31 | $global:ropcBodySynapse = "$($ropcBodyCore)&scope=https://dev.azuresynapse.net/.default" 32 | $global:ropcBodyManagement = "$($ropcBodyCore)&scope=https://management.azure.com/.default" 33 | $global:ropcBodySynapseSQL = "$($ropcBodyCore)&scope=https://sql.azuresynapse.net/.default" 34 | $global:ropcBodyPowerBI = "$($ropcBodyCore)&scope=https://analysis.windows.net/powerbi/api/.default" 35 | 36 | $artifactsPath = "..\..\" 37 | $reportsPath = "..\reports" 38 | $templatesPath = "..\templates" 39 | $datasetsPath = "..\datasets" 40 | $dataflowsPath = "..\dataflows" 41 | $pipelinesPath = "..\pipelines" 42 | $sqlScriptsPath = "..\sql" 43 | } else { 44 | if(Get-Module -Name solliance-synapse-automation){ 45 | Remove-Module solliance-synapse-automation 46 | } 47 | Import-Module "..\solliance-synapse-automation" 48 | Initialize-ExecutionTiming 49 | 50 | #Different approach to run automation in Cloud Shell 51 | $subs = Get-AzSubscription | Select-Object -ExpandProperty Name 52 | if($subs.GetType().IsArray -and $subs.length -gt 1){ 53 | $subOptions = [System.Collections.ArrayList]::new() 54 | for($subIdx=0; $subIdx -lt $subs.length; $subIdx++){ 55 | $opt = New-Object System.Management.Automation.Host.ChoiceDescription "$($subs[$subIdx])", "Selects the $($subs[$subIdx]) subscription." 56 | $subOptions.Add($opt) 57 | } 58 | $selectedSubIdx = $host.ui.PromptForChoice('Enter the desired Azure Subscription for this lab','Copy and paste the name of the subscription to make your choice.', $subOptions.ToArray(),0) 59 | $selectedSubName = $subs[$selectedSubIdx] 60 | Write-Information "Selecting the $selectedSubName subscription" 61 | Select-AzSubscription -SubscriptionName $selectedSubName 62 | } 63 | 64 | $resourceGroupName = Read-Host "Enter the resource group name"; 65 | 66 | $userName = ((az ad signed-in-user show) | ConvertFrom-JSON).UserPrincipalName 67 | 68 | #$global:sqlPassword = Read-Host -Prompt "Enter the SQL Administrator password you used in the deployment" -AsSecureString 69 | #$global:sqlPassword = [System.Runtime.InteropServices.Marshal]::PtrToStringUni([System.Runtime.InteropServices.Marshal]::SecureStringToCoTaskMemUnicode($sqlPassword)) 70 | 71 | $artifactsPath = "..\..\" 72 | $reportsPath = "..\reports" 73 | $templatesPath = "..\templates" 74 | $datasetsPath = "..\datasets" 75 | $dataflowsPath = "..\dataflows" 76 | $pipelinesPath = "..\pipelines" 77 | $sqlScriptsPath = "..\sql" 78 | } 79 | 80 | Write-Information "Using $resourceGroupName"; 81 | 82 | $uniqueId = (Get-AzResourceGroup -Name $resourceGroupName).Tags["DeploymentId"] 83 | $subscriptionId = (Get-AzContext).Subscription.Id 84 | $tenantId = (Get-AzContext).Tenant.Id 85 | $global:logindomain = (Get-AzContext).Tenant.Id; 86 | 87 | $workspaceName = "asaworkspace$($uniqueId)" 88 | $cosmosDbAccountName = "asacosmosdb$($uniqueId)" 89 | $cosmosDbDatabase = "CustomerProfile" 90 | $cosmosDbContainer = "OnlineUserProfile01" 91 | $dataLakeAccountName = "asadatalake$($uniqueId)" 92 | $blobStorageAccountName = "asastore$($uniqueId)" 93 | $keyVaultName = "asakeyvault$($uniqueId)" 94 | $keyVaultSQLUserSecretName = "SQL-USER-ASA" 95 | $sqlPoolName = "SQLPool01" 96 | $integrationRuntimeName = "AzureIntegrationRuntime01" 97 | $sparkPoolName = "SparkPool01" 98 | $amlWorkspaceName = "amlworkspace$($uniqueId)" 99 | $global:sqlEndpoint = "$($workspaceName).sql.azuresynapse.net" 100 | $global:sqlUser = "asa.sql.admin" 101 | 102 | $global:synapseToken = "" 103 | $global:synapseSQLToken = "" 104 | $global:managementToken = "" 105 | $global:powerbiToken = ""; 106 | 107 | $global:tokenTimes = [ordered]@{ 108 | Synapse = (Get-Date -Year 1) 109 | SynapseSQL = (Get-Date -Year 1) 110 | Management = (Get-Date -Year 1) 111 | PowerBI = (Get-Date -Year 1) 112 | } 113 | 114 | # Write-Information "Assign Ownership to L400 Proctors on Synapse Workspace" 115 | # Assign-SynapseRole -WorkspaceName $workspaceName -RoleId "6e4bf58a-b8e1-4cc3-bbf9-d73143322b78" -PrincipalId "37548b2e-e5ab-4d2b-b0da-4d812f56c30e" # Workspace Admin 116 | # Assign-SynapseRole -WorkspaceName $workspaceName -RoleId "7af0c69a-a548-47d6-aea3-d00e69bd83aa" -PrincipalId "37548b2e-e5ab-4d2b-b0da-4d812f56c30e" # SQL Admin 117 | # Assign-SynapseRole -WorkspaceName $workspaceName -RoleId "c3a6d2f1-a26f-4810-9b0f-591308d5cbf1" -PrincipalId "37548b2e-e5ab-4d2b-b0da-4d812f56c30e" # Apache Spark Admin 118 | 119 | #add the current user... 120 | $user = Get-AzADUser -UserPrincipalName $userName 121 | # Assign-SynapseRole -WorkspaceName $workspaceName -RoleId "6e4bf58a-b8e1-4cc3-bbf9-d73143322b78" -PrincipalId $user.id # Workspace Admin 122 | # Assign-SynapseRole -WorkspaceName $workspaceName -RoleId "7af0c69a-a548-47d6-aea3-d00e69bd83aa" -PrincipalId $user.id # SQL Admin 123 | # Assign-SynapseRole -WorkspaceName $workspaceName -RoleId "c3a6d2f1-a26f-4810-9b0f-591308d5cbf1" -PrincipalId $user.id # Apache Spark Admin 124 | 125 | # #Set the Azure AD Admin - otherwise it will bail later 126 | # Set-SqlAdministrator $username $user.id; 127 | 128 | #add the permission to the datalake to workspace 129 | $id = (Get-AzADServicePrincipal -DisplayName $workspacename).id 130 | New-AzRoleAssignment -Objectid $id -RoleDefinitionName "Storage Blob Data Owner" -Scope "/subscriptions/$subscriptionId/resourceGroups/$resourceGroupName/providers/Microsoft.Storage/storageAccounts/$dataLakeAccountName" -ErrorAction SilentlyContinue; 131 | New-AzRoleAssignment -SignInName $username -RoleDefinitionName "Storage Blob Data Owner" -Scope "/subscriptions/$subscriptionId/resourceGroups/$resourceGroupName/providers/Microsoft.Storage/storageAccounts/$dataLakeAccountName" -ErrorAction SilentlyContinue; 132 | 133 | Write-Information "Setting Key Vault Access Policy" 134 | Set-AzKeyVaultAccessPolicy -ResourceGroupName $resourceGroupName -VaultName $keyVaultName -UserPrincipalName $userName -PermissionsToSecrets set,delete,get,list 135 | Set-AzKeyVaultAccessPolicy -ResourceGroupName $resourceGroupName -VaultName $keyVaultName -ObjectId $id -PermissionsToSecrets set,delete,get,list 136 | 137 | #remove need to ask for the password in script. 138 | $global:sqlPassword = $(Get-AzKeyVaultSecret -VaultName $keyVaultName -Name "SqlPassword" -AsPlainText) 139 | 140 | Write-Information "Create SQL-USER-ASA Key Vault Secret" 141 | $secretValue = ConvertTo-SecureString $sqlPassword -AsPlainText -Force 142 | Set-AzKeyVaultSecret -VaultName $keyVaultName -Name $keyVaultSQLUserSecretName -SecretValue $secretValue 143 | 144 | Write-Information "Create KeyVault linked service $($keyVaultName)" 145 | 146 | $result = Create-KeyVaultLinkedService -TemplatesPath $templatesPath -WorkspaceName $workspaceName -Name $keyVaultName 147 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 148 | 149 | Write-Information "Create Integration Runtime $($integrationRuntimeName)" 150 | 151 | $result = Create-IntegrationRuntime -TemplatesPath $templatesPath -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -WorkspaceName $workspaceName -Name $integrationRuntimeName -CoreCount 16 -TimeToLive 60 152 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 153 | 154 | Write-Information "Create Data Lake linked service $($dataLakeAccountName)" 155 | 156 | $dataLakeAccountKey = List-StorageAccountKeys -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -Name $dataLakeAccountName 157 | $result = Create-DataLakeLinkedService -TemplatesPath $templatesPath -WorkspaceName $workspaceName -Name $dataLakeAccountName -Key $dataLakeAccountKey 158 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 159 | 160 | Write-Information "Create Blob Storage linked service $($blobStorageAccountName)" 161 | 162 | $blobStorageAccountKey = List-StorageAccountKeys -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -Name $blobStorageAccountName 163 | $result = Create-BlobStorageLinkedService -TemplatesPath $templatesPath -WorkspaceName $workspaceName -Name $blobStorageAccountName -Key $blobStorageAccountKey 164 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 165 | 166 | Get-ExecutionTiming 167 | 168 | Write-Information "Copy Public Data" 169 | 170 | Ensure-ValidTokens 171 | 172 | if ([System.Environment]::OSVersion.Platform -eq "Unix") 173 | { 174 | $azCopyLink = Check-HttpRedirect "https://aka.ms/downloadazcopy-v10-linux" 175 | 176 | if (!$azCopyLink) 177 | { 178 | $azCopyLink = "https://azcopyvnext.azureedge.net/release20200709/azcopy_linux_amd64_10.5.0.tar.gz" 179 | } 180 | 181 | Invoke-WebRequest $azCopyLink -OutFile "azCopy.tar.gz" 182 | tar -xf "azCopy.tar.gz" 183 | $azCopyCommand = (Get-ChildItem -Path ".\" -Recurse azcopy).Directory.FullName 184 | cd $azCopyCommand 185 | chmod +x azcopy 186 | cd .. 187 | $azCopyCommand += "\azcopy" 188 | } 189 | else 190 | { 191 | $azCopyLink = Check-HttpRedirect "https://aka.ms/downloadazcopy-v10-windows" 192 | 193 | if (!$azCopyLink) 194 | { 195 | $azCopyLink = "https://azcopyvnext.azureedge.net/release20200501/azcopy_windows_amd64_10.4.3.zip" 196 | } 197 | 198 | Invoke-WebRequest $azCopyLink -OutFile "azCopy.zip" 199 | Expand-Archive "azCopy.zip" -DestinationPath ".\" -Force 200 | $azCopyCommand = (Get-ChildItem -Path ".\" -Recurse azcopy.exe).Directory.FullName 201 | $azCopyCommand += "\azcopy" 202 | } 203 | 204 | #$jobs = $(azcopy jobs list) 205 | 206 | $download = $true; 207 | 208 | $publicDataUrl = "https://solliancepublicdata.blob.core.windows.net/" 209 | $dataLakeStorageUrl = "https://"+ $dataLakeAccountName + ".dfs.core.windows.net/" 210 | $dataLakeStorageBlobUrl = "https://"+ $dataLakeAccountName + ".blob.core.windows.net/" 211 | $dataLakeStorageAccountKey = (Get-AzStorageAccountKey -ResourceGroupName $resourceGroupName -AccountName $dataLakeAccountName)[0].Value 212 | $dataLakeContext = New-AzStorageContext -StorageAccountName $dataLakeAccountName -StorageAccountKey $dataLakeStorageAccountKey 213 | $destinationSasKey = New-AzStorageContainerSASToken -Container "wwi-02" -Context $dataLakeContext -Permission rwdl 214 | 215 | if ($download) 216 | { 217 | Write-Information "Copying single files from the public data account..." 218 | $singleFiles = @{ 219 | customer_info = "wwi-02/customer-info/customerinfo.csv" 220 | products = "wwi-02/data-generators/generator-product/generator-product.csv" 221 | dates = "wwi-02/data-generators/generator-date.csv" 222 | customer = "wwi-02/data-generators/generator-customer.csv" 223 | onnx = "wwi-02/ml/onnx-hex/product_seasonality_classifier.onnx.hex" 224 | } 225 | 226 | foreach ($singleFile in $singleFiles.Keys) { 227 | $source = $publicDataUrl + $singleFiles[$singleFile] 228 | $destination = $dataLakeStorageBlobUrl + $singleFiles[$singleFile] + $destinationSasKey 229 | Write-Information "Copying file $($source) to $($destination)" 230 | & $azCopyCommand copy $source $destination 231 | Get-ExecutionTiming 232 | } 233 | 234 | Write-Information "Copying sample sales raw data directories from the public data account..." 235 | 236 | $dataDirectories = @{ 237 | # salesmall_2014 = "wwi-02,wwi-02/sale-small/Year=2014" 238 | # salesmall_2015 = "wwi-02,wwi-02/sale-small/Year=2015" 239 | # salesmall_2016 = "wwi-02,wwi-02/sale-small/Year=2016" 240 | # salesmall_2017 = "wwi-02,wwi-02/sale-small/Year=2017" 241 | # salesmall_2018 = "wwi-02,wwi-02/sale-small/Year=2018" 242 | salesmall_2019 = "wwi-02/sale-small,wwi-02/sale-small/Year=2019" 243 | analytics = "wwi-02,wwi-02/campaign-analytics/" 244 | factsale = "wwi-02,wwi-02/sale-csv/" 245 | # security = "wwi-02,wwi-02-reduced/security/" 246 | # salespoc = "wwi-02,wwi-02/sale-poc/" 247 | } 248 | 249 | foreach ($dataDirectory in $dataDirectories.Keys) { 250 | 251 | $vals = $dataDirectories[$dataDirectory].tostring().split(","); 252 | 253 | $source = $publicDataUrl + $vals[1]; 254 | 255 | $path = $vals[0]; 256 | 257 | $destination = $dataLakeStorageBlobUrl + $path + $destinationSasKey 258 | Write-Information "Copying directory $($source) to $($destination)" 259 | & $azCopyCommand copy $source $destination --recursive=true 260 | Get-ExecutionTiming 261 | } 262 | } 263 | 264 | Write-Information "Start the $($sqlPoolName) SQL pool if needed." 265 | 266 | $result = Get-SQLPool -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName 267 | if ($result.properties.status -ne "Online") { 268 | Control-SQLPool -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -Action resume 269 | Wait-ForSQLPool -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -TargetStatus Online 270 | } 271 | 272 | #Write-Information "Scale up the $($sqlPoolName) SQL pool to DW3000c to prepare for baby MOADs import." 273 | 274 | #Control-SQLPool -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -Action scale -SKU DW3000c 275 | #Wait-ForSQLPool -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -TargetStatus Online 276 | 277 | Write-Information "Create SQL logins in master SQL pool" 278 | 279 | $params = @{ PASSWORD = $sqlPassword } 280 | $result = Execute-SQLScriptFile -SQLScriptsPath $sqlScriptsPath -WorkspaceName $workspaceName -SQLPoolName "master" -FileName "01-create-logins" -Parameters $params 281 | $result 282 | 283 | Write-Information "Create SQL users and role assignments in $($sqlPoolName)" 284 | 285 | $params = @{ USER_NAME = $userName } 286 | $result = Execute-SQLScriptFile -SQLScriptsPath $sqlScriptsPath -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -FileName "02-create-users" -Parameters $params 287 | $result 288 | 289 | Write-Information "Create schemas in $($sqlPoolName)" 290 | 291 | $params = @{} 292 | $result = Execute-SQLScriptFile -SQLScriptsPath $sqlScriptsPath -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -FileName "03-create-schemas" -Parameters $params 293 | $result 294 | 295 | Write-Information "Create tables in the [wwi] schema in $($sqlPoolName)" 296 | 297 | $params = @{} 298 | $result = Execute-SQLScriptFile -SQLScriptsPath $sqlScriptsPath -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -FileName "04-create-tables-in-wwi-schema" -Parameters $params 299 | $result 300 | 301 | 302 | Write-Information "Create tables in the [wwi_ml] schema in $($sqlPoolName)" 303 | 304 | $dataLakeAccountKey = List-StorageAccountKeys -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -Name $dataLakeAccountName 305 | $params = @{ 306 | DATA_LAKE_ACCOUNT_NAME = $dataLakeAccountName 307 | DATA_LAKE_ACCOUNT_KEY = $dataLakeAccountKey 308 | } 309 | $result = Execute-SQLScriptFile -SQLScriptsPath $sqlScriptsPath -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -FileName "05-create-tables-in-wwi-ml-schema" -Parameters $params 310 | $result 311 | 312 | Write-Information "Create tables in the [wwi_security] schema in $($sqlPoolName)" 313 | 314 | $params = @{ 315 | DATA_LAKE_ACCOUNT_NAME = $dataLakeAccountName 316 | DATA_LAKE_ACCOUNT_KEY = $dataLakeAccountKey 317 | } 318 | $result = Execute-SQLScriptFile -SQLScriptsPath $sqlScriptsPath -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -FileName "06-create-tables-in-wwi-security-schema" -Parameters $params 319 | $result 320 | 321 | Get-ExecutionTiming 322 | 323 | Write-Information "Create linked service for SQL pool $($sqlPoolName) with user asa.sql.admin" 324 | 325 | $linkedServiceName = $sqlPoolName.ToLower() 326 | $result = Create-SQLPoolKeyVaultLinkedService -TemplatesPath $templatesPath -WorkspaceName $workspaceName -Name $linkedServiceName -DatabaseName $sqlPoolName ` 327 | -UserName "asa.sql.admin" -KeyVaultLinkedServiceName $keyVaultName -SecretName $keyVaultSQLUserSecretName 328 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 329 | 330 | Write-Information "Create linked service for SQL pool $($sqlPoolName) with user asa.sql.highperf" 331 | 332 | $linkedServiceName = "$($sqlPoolName.ToLower())_highperf" 333 | $result = Create-SQLPoolKeyVaultLinkedService -TemplatesPath $templatesPath -WorkspaceName $workspaceName -Name $linkedServiceName -DatabaseName $sqlPoolName ` 334 | -UserName "asa.sql.highperf" -KeyVaultLinkedServiceName $keyVaultName -SecretName $keyVaultSQLUserSecretName 335 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 336 | 337 | <# Day 1-3#> 338 | 339 | Write-Information "Create data sets for data load in SQL pool $($sqlPoolName)" 340 | 341 | $loadingDatasets = @{ 342 | wwi02_date_adls = $dataLakeAccountName 343 | wwi02_product_adls = $dataLakeAccountName 344 | wwi02_sale_small_adls = $dataLakeAccountName 345 | wwi02_date_asa = $sqlPoolName.ToLower() 346 | wwi02_product_asa = $sqlPoolName.ToLower() 347 | wwi02_sale_small_asa = "$($sqlPoolName.ToLower())_highperf" 348 | } 349 | 350 | foreach ($dataset in $loadingDatasets.Keys) { 351 | Write-Information "Creating dataset $($dataset)" 352 | $result = Create-Dataset -DatasetsPath $datasetsPath -WorkspaceName $workspaceName -Name $dataset -LinkedServiceName $loadingDatasets[$dataset] 353 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 354 | } 355 | 356 | Write-Information "Create pipeline to load the SQL pool" 357 | 358 | $params = @{ 359 | BLOB_STORAGE_LINKED_SERVICE_NAME = $blobStorageAccountName 360 | } 361 | $loadingPipelineName = "Setup - Load SQL Pool (global)" 362 | $fileName = "load_sql_pool_from_data_lake" 363 | 364 | Write-Information "Creating pipeline $($loadingPipelineName)" 365 | 366 | $result = Create-Pipeline -PipelinesPath $pipelinesPath -WorkspaceName $workspaceName -Name $loadingPipelineName -FileName $fileName -Parameters $params 367 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 368 | 369 | Get-ExecutionTiming 370 | 371 | Write-Information "Running pipeline $($loadingPipelineName)" 372 | 373 | $result = Run-Pipeline -WorkspaceName $workspaceName -Name $loadingPipelineName 374 | $result = Wait-ForPipelineRun -WorkspaceName $workspaceName -RunId $result.runId 375 | $result 376 | 377 | Ensure-ValidTokens 378 | 379 | Get-ExecutionTiming 380 | 381 | Write-Information "Deleting pipeline $($loadingPipelineName)" 382 | 383 | $result = Delete-ASAObject -WorkspaceName $workspaceName -Category "pipelines" -Name $loadingPipelineName 384 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 385 | 386 | foreach ($dataset in $loadingDatasets.Keys) { 387 | Write-Information "Deleting dataset $($dataset)" 388 | $result = Delete-ASAObject -WorkspaceName $workspaceName -Category "datasets" -Name $dataset 389 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 390 | } 391 | 392 | Get-ExecutionTiming 393 | 394 | Write-Information "Create tables in wwi_perf schema in SQL pool $($sqlPoolName)" 395 | 396 | $params = @{} 397 | $scripts = [ordered]@{ 398 | "07-create-wwi-perf-sale-heap" = "CTAS : Sale_Heap" 399 | "08-create-wwi-perf-sale-partition01" = "CTAS : Sale_Partition01" 400 | "09-create-wwi-perf-sale-partition02" = "CTAS : Sale_Partition02" 401 | "10-create-wwi-perf-sale-index" = "CTAS : Sale_Index" 402 | "11-create-wwi-perf-sale-hash-ordered" = "CTAS : Sale_Hash_Ordered" 403 | } 404 | 405 | foreach ($script in $scripts.Keys) { 406 | 407 | $refTime = (Get-Date).ToUniversalTime() 408 | Write-Information "Starting $($script) with label $($scripts[$script])" 409 | 410 | # initiate the script and wait until it finishes 411 | Execute-SQLScriptFile -SQLScriptsPath $sqlScriptsPath -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -FileName $script -ForceReturn $true 412 | #Wait-ForSQLQuery -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -Label $scripts[$script] -ReferenceTime $refTime 413 | 414 | Get-ExecutionTiming 415 | } 416 | 417 | #Write-Information "Scale down the $($sqlPoolName) SQL pool to DW500c after baby MOADs import." 418 | 419 | #Control-SQLPool -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -Action scale -SKU DW500c 420 | #Wait-ForSQLPool -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -WorkspaceName $workspaceName -SQLPoolName $sqlPoolName -TargetStatus Online 421 | 422 | Write-Information "Create linked service for SQL pool $($sqlPoolName) with user asa.sql.import01" 423 | 424 | $linkedServiceName = "$($sqlPoolName.ToLower())_import01" 425 | $result = Create-SQLPoolKeyVaultLinkedService -TemplatesPath $templatesPath -WorkspaceName $workspaceName -Name $linkedServiceName -DatabaseName $sqlPoolName ` 426 | -UserName "asa.sql.import01" -KeyVaultLinkedServiceName $keyVaultName -SecretName $keyVaultSQLUserSecretName 427 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 428 | 429 | Write-Information "Create linked service for SQL pool $($sqlPoolName) with user asa.sql.workload01" 430 | 431 | $linkedServiceName = "$($sqlPoolName.ToLower())_workload01" 432 | $result = Create-SQLPoolKeyVaultLinkedService -TemplatesPath $templatesPath -WorkspaceName $workspaceName -Name $linkedServiceName -DatabaseName $sqlPoolName ` 433 | -UserName "asa.sql.workload01" -KeyVaultLinkedServiceName $keyVaultName -SecretName $keyVaultSQLUserSecretName 434 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 435 | 436 | Write-Information "Create linked service for SQL pool $($sqlPoolName) with user asa.sql.workload02" 437 | 438 | $linkedServiceName = "$($sqlPoolName.ToLower())_workload02" 439 | $result = Create-SQLPoolKeyVaultLinkedService -TemplatesPath $templatesPath -WorkspaceName $workspaceName -Name $linkedServiceName -DatabaseName $sqlPoolName ` 440 | -UserName "asa.sql.workload02" -KeyVaultLinkedServiceName $keyVaultName -SecretName $keyVaultSQLUserSecretName 441 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 442 | 443 | Get-ExecutionTiming 444 | 445 | # 446 | # =============== COSMOS DB IMPORT - MUST REMAIN LAST IN SCRIPT !!! ==================== 447 | # 448 | 449 | $download = $true; 450 | 451 | #generate new one just in case... 452 | $destinationSasKey = New-AzStorageContainerSASToken -Container "wwi-02" -Context $dataLakeContext -Permission rwdl 453 | 454 | if ($download) 455 | { 456 | Write-Information "Copying sample sales raw data directories from the public data account..." 457 | 458 | $dataDirectories = @{ 459 | profile01 = "wwi-02,wwi-02/online-user-profiles-01/" 460 | profile02 = "wwi-02,wwi-02/online-user-profiles-02/" 461 | } 462 | 463 | foreach ($dataDirectory in $dataDirectories.Keys) { 464 | 465 | $vals = $dataDirectories[$dataDirectory].tostring().split(","); 466 | 467 | $source = $publicDataUrl + $vals[1]; 468 | 469 | $path = $vals[0]; 470 | 471 | $destination = $dataLakeStorageBlobUrl + $path + $destinationSasKey 472 | Write-Information "Copying directory $($source) to $($destination)" 473 | & $azCopyCommand copy $source $destination --recursive=true 474 | 475 | Get-ExecutionTiming 476 | } 477 | } 478 | 479 | Write-Information "Counting Cosmos DB item in database $($cosmosDbDatabase), container $($cosmosDbContainer)" 480 | $documentCount = Count-CosmosDbDocuments -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -CosmosDbAccountName $cosmosDbAccountName ` 481 | -CosmosDbDatabase $cosmosDbDatabase -CosmosDbContainer $cosmosDbContainer 482 | 483 | Write-Information "Found $documentCount in Cosmos DB container $($cosmosDbContainer)" 484 | 485 | Get-ExecutionTiming 486 | 487 | Install-Module -Name Az.CosmosDB 488 | 489 | if ($documentCount -ne 100000) 490 | { 491 | # Increase RUs in CosmosDB container 492 | Write-Information "Increase Cosmos DB container $($cosmosDbContainer) to 10000 RUs" 493 | 494 | $container = Get-AzCosmosDBSqlContainer ` 495 | -ResourceGroupName $resourceGroupName ` 496 | -AccountName $cosmosDbAccountName -DatabaseName $cosmosDbDatabase ` 497 | -Name $cosmosDbContainer 498 | 499 | Update-AzCosmosDBSqlContainer -ResourceGroupName $resourceGroupName ` 500 | -AccountName $cosmosDbAccountName -DatabaseName $cosmosDbDatabase ` 501 | -Name $cosmosDbContainer -Throughput 10000 ` 502 | -PartitionKeyKind $container.Resource.PartitionKey.Kind ` 503 | -PartitionKeyPath $container.Resource.PartitionKey.Paths 504 | 505 | $name = "wwi02_online_user_profiles_01_adal" 506 | Write-Information "Create dataset $($name)" 507 | $result = Create-Dataset -DatasetsPath $datasetsPath -WorkspaceName $workspaceName -Name $name -LinkedServiceName $dataLakeAccountName 508 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 509 | 510 | Write-Information "Create Cosmos DB linked service $($cosmosDbAccountName)" 511 | $cosmosDbAccountKey = List-CosmosDBKeys -SubscriptionId $subscriptionId -ResourceGroupName $resourceGroupName -Name $cosmosDbAccountName 512 | $result = Create-CosmosDBLinkedService -TemplatesPath $templatesPath -WorkspaceName $workspaceName -Name $cosmosDbAccountName -Database $cosmosDbDatabase -Key $cosmosDbAccountKey 513 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 514 | 515 | $name = "customer_profile_cosmosdb" 516 | Write-Information "Create dataset $($name)" 517 | $result = Create-Dataset -DatasetsPath $datasetsPath -WorkspaceName $workspaceName -Name $name -LinkedServiceName $cosmosDbAccountName 518 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 519 | 520 | $name = "Setup - Import User Profile Data into Cosmos DB" 521 | $fileName = "import_customer_profiles_into_cosmosdb" 522 | Write-Information "Create pipeline $($name)" 523 | $result = Create-Pipeline -PipelinesPath $pipelinesPath -WorkspaceName $workspaceName -Name $name -FileName $fileName 524 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 525 | 526 | Get-ExecutionTiming 527 | 528 | Write-Information "Running pipeline $($name)" 529 | $pipelineRunResult = Run-Pipeline -WorkspaceName $workspaceName -Name $name 530 | $result = Wait-ForPipelineRun -WorkspaceName $workspaceName -RunId $pipelineRunResult.runId 531 | $result 532 | 533 | # 534 | # =============== WAIT HERE FOR PIPELINE TO FINISH - MIGHT TAKE ~45 MINUTES ==================== 535 | # 536 | # COPY 100000 records to CosmosDB ==> SELECT VALUE COUNT(1) FROM C 537 | # 538 | 539 | Get-ExecutionTiming 540 | 541 | $name = "Setup - Import User Profile Data into Cosmos DB" 542 | Write-Information "Delete pipeline $($name)" 543 | $result = Delete-ASAObject -WorkspaceName $workspaceName -Category "pipelines" -Name $name 544 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 545 | 546 | $name = "customer_profile_cosmosdb" 547 | Write-Information "Delete dataset $($name)" 548 | $result = Delete-ASAObject -WorkspaceName $workspaceName -Category "datasets" -Name $name 549 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 550 | 551 | $name = "wwi02_online_user_profiles_01_adal" 552 | Write-Information "Delete dataset $($name)" 553 | $result = Delete-ASAObject -WorkspaceName $workspaceName -Category "datasets" -Name $name 554 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 555 | 556 | $name = $cosmosDbAccountName 557 | Write-Information "Delete linked service $($name)" 558 | $result = Delete-ASAObject -WorkspaceName $workspaceName -Category "linkedServices" -Name $name 559 | Wait-ForOperation -WorkspaceName $workspaceName -OperationId $result.operationId 560 | } 561 | 562 | $container = Get-AzCosmosDBSqlContainer ` 563 | -ResourceGroupName $resourceGroupName ` 564 | -AccountName $cosmosDbAccountName -DatabaseName $cosmosDbDatabase ` 565 | -Name $cosmosDbContainer 566 | 567 | Update-AzCosmosDBSqlContainer -ResourceGroupName $resourceGroupName ` 568 | -AccountName $cosmosDbAccountName -DatabaseName $cosmosDbDatabase ` 569 | -Name $cosmosDbContainer -Throughput 400 ` 570 | -PartitionKeyKind $container.Resource.PartitionKey.Kind ` 571 | -PartitionKeyPath $container.Resource.PartitionKey.Paths 572 | 573 | Get-ExecutionTiming -------------------------------------------------------------------------------- /Synapse/setup/datasets/customer_profile_cosmosdb.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "customer_profile_cosmosdb", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "CosmosDbSqlApiCollection", 10 | "schema": { 11 | "type": "object", 12 | "properties": { 13 | "userId": { 14 | "type": "integer" 15 | }, 16 | "cartId": { 17 | "type": "string" 18 | }, 19 | "preferredProducts": { 20 | "type": "array" 21 | }, 22 | "productReviews": { 23 | "type": "array", 24 | "items": { 25 | "type": "object", 26 | "properties": { 27 | "productId": { 28 | "type": "integer" 29 | }, 30 | "reviewText": { 31 | "type": "string" 32 | }, 33 | "reviewDate": { 34 | "type": "string" 35 | } 36 | } 37 | } 38 | } 39 | } 40 | }, 41 | "typeProperties": { 42 | "collectionName": "OnlineUserProfile01" 43 | } 44 | }, 45 | "type": "Microsoft.Synapse/workspaces/datasets" 46 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_date_adls.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_date_adls", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobFSLocation", 13 | "fileName": "generator-date.csv", 14 | "folderPath": "data-generators", 15 | "fileSystem": "wwi-02" 16 | }, 17 | "columnDelimiter": ",", 18 | "escapeChar": "\\", 19 | "quoteChar": "\"" 20 | }, 21 | "schema": [ 22 | { 23 | "type": "String" 24 | }, 25 | { 26 | "type": "String" 27 | }, 28 | { 29 | "type": "String" 30 | }, 31 | { 32 | "type": "String" 33 | }, 34 | { 35 | "type": "String" 36 | } 37 | ] 38 | }, 39 | "type": "Microsoft.Synapse/workspaces/datasets" 40 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_date_asa.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_date_asa", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "AzureSqlDWTable", 10 | "schema": [ 11 | { 12 | "name": "DateId", 13 | "type": "int", 14 | "precision": 10 15 | }, 16 | { 17 | "name": "Day", 18 | "type": "tinyint", 19 | "precision": 3 20 | }, 21 | { 22 | "name": "Month", 23 | "type": "tinyint", 24 | "precision": 3 25 | }, 26 | { 27 | "name": "Quarter", 28 | "type": "tinyint", 29 | "precision": 3 30 | }, 31 | { 32 | "name": "Year", 33 | "type": "smallint", 34 | "precision": 5 35 | } 36 | ], 37 | "typeProperties": { 38 | "schema": "wwi", 39 | "table": "Date" 40 | } 41 | }, 42 | "type": "Microsoft.Synapse/workspaces/datasets" 43 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_online_user_profiles_01_adal.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_online_user_profiles_01_adal", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "Json", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobStorageLocation", 13 | "fileName": "*.json", 14 | "folderPath": "online-user-profiles-01", 15 | "container": "wwi-02" 16 | } 17 | }, 18 | "schema": { 19 | "type": "object", 20 | "properties": { 21 | "userId": { 22 | "type": "integer" 23 | }, 24 | "cartId": { 25 | "type": "string" 26 | }, 27 | "preferredProducts": { 28 | "type": "array", 29 | "items": { 30 | "type": "integer" 31 | } 32 | }, 33 | "productReviews": { 34 | "type": "array", 35 | "items": { 36 | "type": "object", 37 | "properties": { 38 | "productId": { 39 | "type": "integer" 40 | }, 41 | "reviewText": { 42 | "type": "string" 43 | }, 44 | "reviewDate": { 45 | "type": "string" 46 | } 47 | } 48 | } 49 | } 50 | } 51 | } 52 | }, 53 | "type": "Microsoft.Synapse/workspaces/datasets" 54 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_poc_customer_adls.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_poc_customer_adls", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobFSLocation", 13 | "fileName": "generator-customer.csv", 14 | "folderPath": "data-generators", 15 | "fileSystem": "wwi-02" 16 | }, 17 | "columnDelimiter": ",", 18 | "escapeChar": "\\", 19 | "firstRowAsHeader": true, 20 | "quoteChar": "'" 21 | }, 22 | "schema": [ 23 | { 24 | "name": "CustomerId", 25 | "type": "String" 26 | }, 27 | { 28 | "name": "FirstName", 29 | "type": "String" 30 | }, 31 | { 32 | "name": "MiddleInitial", 33 | "type": "String" 34 | }, 35 | { 36 | "name": "LastName", 37 | "type": "String" 38 | }, 39 | { 40 | "name": "FullName", 41 | "type": "String" 42 | }, 43 | { 44 | "name": "Gender", 45 | "type": "String" 46 | }, 47 | { 48 | "name": "Age", 49 | "type": "String" 50 | }, 51 | { 52 | "name": "BirthDate", 53 | "type": "String" 54 | }, 55 | { 56 | "name": "Address", 57 | "type": "String" 58 | }, 59 | { 60 | "name": "Mobile", 61 | "type": "String" 62 | }, 63 | { 64 | "name": "Email", 65 | "type": "String" 66 | } 67 | ] 68 | } 69 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_poc_customer_asa.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_poc_customer_asa", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "AzureSqlDWTable", 10 | "schema": [ 11 | { 12 | "name": "CustomerId", 13 | "type": "int", 14 | "precision": 10 15 | }, 16 | { 17 | "name": "FirstName", 18 | "type": "nvarchar" 19 | }, 20 | { 21 | "name": "MiddleInitial", 22 | "type": "nvarchar" 23 | }, 24 | { 25 | "name": "LastName", 26 | "type": "nvarchar" 27 | }, 28 | { 29 | "name": "FullName", 30 | "type": "nvarchar" 31 | }, 32 | { 33 | "name": "Gender", 34 | "type": "nvarchar" 35 | }, 36 | { 37 | "name": "Age", 38 | "type": "int", 39 | "precision": 10 40 | }, 41 | { 42 | "name": "BirthDate", 43 | "type": "date" 44 | }, 45 | { 46 | "name": "Address_PostalCode", 47 | "type": "nvarchar" 48 | }, 49 | { 50 | "name": "Address_Street", 51 | "type": "nvarchar" 52 | }, 53 | { 54 | "name": "Address_City", 55 | "type": "nvarchar" 56 | }, 57 | { 58 | "name": "Address_Country", 59 | "type": "nvarchar" 60 | }, 61 | { 62 | "name": "Mobile", 63 | "type": "nvarchar" 64 | }, 65 | { 66 | "name": "Email", 67 | "type": "nvarchar" 68 | } 69 | ], 70 | "typeProperties": { 71 | "schema": "wwi_poc", 72 | "table": "Customer" 73 | } 74 | } 75 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_product_adls.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_product_adls", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "DelimitedText", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobFSLocation", 13 | "fileName": "generator-product.csv", 14 | "folderPath": "data-generators/generator-product", 15 | "fileSystem": "wwi-02" 16 | }, 17 | "columnDelimiter": ",", 18 | "escapeChar": "\\", 19 | "quoteChar": "\"" 20 | }, 21 | "schema": [ 22 | { 23 | "type": "String" 24 | }, 25 | { 26 | "type": "String" 27 | }, 28 | { 29 | "type": "String" 30 | }, 31 | { 32 | "type": "String" 33 | } 34 | ] 35 | }, 36 | "type": "Microsoft.Synapse/workspaces/datasets" 37 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_product_asa.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_product_asa", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "AzureSqlDWTable", 10 | "schema": [ 11 | { 12 | "name": "ProductId", 13 | "type": "smallint", 14 | "precision": 5 15 | }, 16 | { 17 | "name": "Seasonality", 18 | "type": "tinyint", 19 | "precision": 3 20 | }, 21 | { 22 | "name": "Price", 23 | "type": "decimal", 24 | "precision": 6, 25 | "scale": 2 26 | }, 27 | { 28 | "name": "Profit", 29 | "type": "decimal", 30 | "precision": 6, 31 | "scale": 2 32 | } 33 | ], 34 | "typeProperties": { 35 | "schema": "wwi", 36 | "table": "Product" 37 | } 38 | }, 39 | "type": "Microsoft.Synapse/workspaces/datasets" 40 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_sale_small_adls.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_sale_small_adls", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "Parquet", 10 | "typeProperties": { 11 | "location": { 12 | "type": "AzureBlobFSLocation", 13 | "folderPath": "sale-small", 14 | "fileSystem": "wwi-02" 15 | }, 16 | "compressionCodec": "snappy" 17 | }, 18 | "schema": [ 19 | { 20 | "name": "TransactionId", 21 | "type": "UTF8" 22 | }, 23 | { 24 | "name": "CustomerId", 25 | "type": "INT32" 26 | }, 27 | { 28 | "name": "ProductId", 29 | "type": "INT_16" 30 | }, 31 | { 32 | "name": "Quantity", 33 | "type": "INT_8" 34 | }, 35 | { 36 | "name": "Price", 37 | "type": "DECIMAL", 38 | "precision": 38, 39 | "scale": 18 40 | }, 41 | { 42 | "name": "TotalAmount", 43 | "type": "DECIMAL", 44 | "precision": 38, 45 | "scale": 18 46 | }, 47 | { 48 | "name": "TransactionDate", 49 | "type": "INT32" 50 | }, 51 | { 52 | "name": "ProfitAmount", 53 | "type": "DECIMAL", 54 | "precision": 38, 55 | "scale": 18 56 | }, 57 | { 58 | "name": "Hour", 59 | "type": "INT_8" 60 | }, 61 | { 62 | "name": "Minute", 63 | "type": "INT_8" 64 | }, 65 | { 66 | "name": "StoreId", 67 | "type": "INT_16" 68 | }, 69 | { 70 | "name": "Year", 71 | "type": "UTF8" 72 | }, 73 | { 74 | "name": "Quarter", 75 | "type": "UTF8" 76 | }, 77 | { 78 | "name": "Month", 79 | "type": "UTF8" 80 | }, 81 | { 82 | "name": "Day", 83 | "type": "UTF8" 84 | } 85 | ] 86 | }, 87 | "type": "Microsoft.Synapse/workspaces/datasets" 88 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_sale_small_asa.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_sale_small_asa", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "AzureSqlDWTable", 10 | "schema": [ 11 | { 12 | "name": "TransactionId", 13 | "type": "uniqueidentifier" 14 | }, 15 | { 16 | "name": "CustomerId", 17 | "type": "int", 18 | "precision": 10 19 | }, 20 | { 21 | "name": "ProductId", 22 | "type": "smallint", 23 | "precision": 5 24 | }, 25 | { 26 | "name": "Quantity", 27 | "type": "tinyint", 28 | "precision": 3 29 | }, 30 | { 31 | "name": "Price", 32 | "type": "decimal", 33 | "precision": 9, 34 | "scale": 2 35 | }, 36 | { 37 | "name": "TotalAmount", 38 | "type": "decimal", 39 | "precision": 9, 40 | "scale": 2 41 | }, 42 | { 43 | "name": "TransactionDateId", 44 | "type": "int", 45 | "precision": 10 46 | }, 47 | { 48 | "name": "ProfitAmount", 49 | "type": "decimal", 50 | "precision": 9, 51 | "scale": 2 52 | }, 53 | { 54 | "name": "Hour", 55 | "type": "tinyint", 56 | "precision": 3 57 | }, 58 | { 59 | "name": "Minute", 60 | "type": "tinyint", 61 | "precision": 3 62 | }, 63 | { 64 | "name": "StoreId", 65 | "type": "smallint", 66 | "precision": 5 67 | } 68 | ], 69 | "typeProperties": { 70 | "schema": "wwi", 71 | "table": "SaleSmall" 72 | } 73 | }, 74 | "type": "Microsoft.Synapse/workspaces/datasets" 75 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_sale_small_workload_01_asa.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_sale_small_workload_01_asa", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "AzureSqlDWTable", 10 | "schema": [ 11 | { 12 | "name": "TransactionId", 13 | "type": "uniqueidentifier" 14 | }, 15 | { 16 | "name": "CustomerId", 17 | "type": "int", 18 | "precision": 10 19 | }, 20 | { 21 | "name": "ProductId", 22 | "type": "smallint", 23 | "precision": 5 24 | }, 25 | { 26 | "name": "Quantity", 27 | "type": "smallint", 28 | "precision": 5 29 | }, 30 | { 31 | "name": "Price", 32 | "type": "decimal", 33 | "precision": 9, 34 | "scale": 2 35 | }, 36 | { 37 | "name": "TotalAmount", 38 | "type": "decimal", 39 | "precision": 9, 40 | "scale": 2 41 | }, 42 | { 43 | "name": "TransactionDateId", 44 | "type": "int", 45 | "precision": 10 46 | }, 47 | { 48 | "name": "ProfitAmount", 49 | "type": "decimal", 50 | "precision": 9, 51 | "scale": 2 52 | }, 53 | { 54 | "name": "Hour", 55 | "type": "tinyint", 56 | "precision": 3 57 | }, 58 | { 59 | "name": "Minute", 60 | "type": "tinyint", 61 | "precision": 3 62 | }, 63 | { 64 | "name": "StoreId", 65 | "type": "smallint", 66 | "precision": 5 67 | } 68 | ], 69 | "typeProperties": { 70 | "schema": "wwi", 71 | "table": "SaleSmall" 72 | } 73 | }, 74 | "type": "Microsoft.Synapse/workspaces/datasets" 75 | } -------------------------------------------------------------------------------- /Synapse/setup/datasets/wwi02_sale_small_workload_02_asa.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wwi02_sale_small_workload_02_asa", 3 | "properties": { 4 | "linkedServiceName": { 5 | "referenceName": "#LINKED_SERVICE_NAME#", 6 | "type": "LinkedServiceReference" 7 | }, 8 | "annotations": [], 9 | "type": "AzureSqlDWTable", 10 | "schema": [ 11 | { 12 | "name": "TransactionId", 13 | "type": "uniqueidentifier" 14 | }, 15 | { 16 | "name": "CustomerId", 17 | "type": "int", 18 | "precision": 10 19 | }, 20 | { 21 | "name": "ProductId", 22 | "type": "smallint", 23 | "precision": 5 24 | }, 25 | { 26 | "name": "Quantity", 27 | "type": "smallint", 28 | "precision": 5 29 | }, 30 | { 31 | "name": "Price", 32 | "type": "decimal", 33 | "precision": 9, 34 | "scale": 2 35 | }, 36 | { 37 | "name": "TotalAmount", 38 | "type": "decimal", 39 | "precision": 9, 40 | "scale": 2 41 | }, 42 | { 43 | "name": "TransactionDateId", 44 | "type": "int", 45 | "precision": 10 46 | }, 47 | { 48 | "name": "ProfitAmount", 49 | "type": "decimal", 50 | "precision": 9, 51 | "scale": 2 52 | }, 53 | { 54 | "name": "Hour", 55 | "type": "tinyint", 56 | "precision": 3 57 | }, 58 | { 59 | "name": "Minute", 60 | "type": "tinyint", 61 | "precision": 3 62 | }, 63 | { 64 | "name": "StoreId", 65 | "type": "smallint", 66 | "precision": 5 67 | } 68 | ], 69 | "typeProperties": { 70 | "schema": "wwi", 71 | "table": "SaleSmall" 72 | } 73 | }, 74 | "type": "Microsoft.Synapse/workspaces/datasets" 75 | } -------------------------------------------------------------------------------- /Synapse/setup/media/asaworkspace-deploy-configure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/asaworkspace-deploy-configure.png -------------------------------------------------------------------------------- /Synapse/setup/media/asaworkspace-deploy-progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/asaworkspace-deploy-progress.png -------------------------------------------------------------------------------- /Synapse/setup/media/asaworkspace-deploy-tag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/asaworkspace-deploy-tag.png -------------------------------------------------------------------------------- /Synapse/setup/media/cloudshell-configure-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/cloudshell-configure-01.png -------------------------------------------------------------------------------- /Synapse/setup/media/cloudshell-configure-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/cloudshell-configure-02.png -------------------------------------------------------------------------------- /Synapse/setup/media/cloudshell-configure-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/cloudshell-configure-03.png -------------------------------------------------------------------------------- /Synapse/setup/media/cloudshell-configure-04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/cloudshell-configure-04.png -------------------------------------------------------------------------------- /Synapse/setup/media/cloudshell-setup-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/cloudshell-setup-01.png -------------------------------------------------------------------------------- /Synapse/setup/media/cloudshell-setup-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/cloudshell-setup-02.png -------------------------------------------------------------------------------- /Synapse/setup/media/cloudshell-setup-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/cloudshell-setup-03.png -------------------------------------------------------------------------------- /Synapse/setup/media/cloudshell-setup-04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/media/cloudshell-setup-04.png -------------------------------------------------------------------------------- /Synapse/setup/pipelines/execute_business_analyst_queries.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Lab 08 - Execute Business Analyst Queries", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "Analyst", 7 | "type": "ForEach", 8 | "dependsOn": [], 9 | "userProperties": [], 10 | "typeProperties": { 11 | "items": { 12 | "value": "@range(1,10)", 13 | "type": "Expression" 14 | }, 15 | "activities": [ 16 | { 17 | "name": "Workload 2 for Data Analyst", 18 | "type": "Lookup", 19 | "dependsOn": [], 20 | "policy": { 21 | "timeout": "7.00:00:00", 22 | "retry": 0, 23 | "retryIntervalInSeconds": 30, 24 | "secureOutput": false, 25 | "secureInput": false 26 | }, 27 | "userProperties": [], 28 | "typeProperties": { 29 | "source": { 30 | "type": "SqlDWSource", 31 | "sqlReaderQuery": "select count(X.A) from (\nselect CAST(CustomerId as nvarchar(20)) as A from wwi.SaleSmall) X where A like '%3%'", 32 | "queryTimeout": "02:00:00" 33 | }, 34 | "dataset": { 35 | "referenceName": "wwi02_sale_small_workload_02_asa", 36 | "type": "DatasetReference" 37 | } 38 | } 39 | } 40 | ] 41 | } 42 | } 43 | ], 44 | "annotations": [] 45 | }, 46 | "type": "Microsoft.Synapse/workspaces/pipelines" 47 | } -------------------------------------------------------------------------------- /Synapse/setup/pipelines/execute_data_analyst_and_ceo_queries.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Lab 08 - Execute Data Analyst and CEO Queries", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "CEO", 7 | "type": "ForEach", 8 | "dependsOn": [], 9 | "userProperties": [], 10 | "typeProperties": { 11 | "items": { 12 | "value": "@range(1,20)", 13 | "type": "Expression" 14 | }, 15 | "activities": [ 16 | { 17 | "name": "Workload 1 for CEO", 18 | "type": "Lookup", 19 | "dependsOn": [], 20 | "policy": { 21 | "timeout": "7.00:00:00", 22 | "retry": 0, 23 | "retryIntervalInSeconds": 30, 24 | "secureOutput": false, 25 | "secureInput": false 26 | }, 27 | "userProperties": [], 28 | "typeProperties": { 29 | "source": { 30 | "type": "SqlDWSource", 31 | "sqlReaderQuery": "select count(X.A) from (\nselect CAST(CustomerId as nvarchar(20)) as A from wwi.SaleSmall) X where A like '%3%'", 32 | "queryTimeout": "02:00:00" 33 | }, 34 | "dataset": { 35 | "referenceName": "wwi02_sale_small_workload_01_asa", 36 | "type": "DatasetReference" 37 | } 38 | } 39 | } 40 | ] 41 | } 42 | }, 43 | { 44 | "name": "Analyst", 45 | "type": "ForEach", 46 | "dependsOn": [], 47 | "userProperties": [], 48 | "typeProperties": { 49 | "items": { 50 | "value": "@range(1,20)", 51 | "type": "Expression" 52 | }, 53 | "activities": [ 54 | { 55 | "name": "Workload 2 for Data Analyst", 56 | "type": "Lookup", 57 | "dependsOn": [], 58 | "policy": { 59 | "timeout": "7.00:00:00", 60 | "retry": 0, 61 | "retryIntervalInSeconds": 30, 62 | "secureOutput": false, 63 | "secureInput": false 64 | }, 65 | "userProperties": [], 66 | "typeProperties": { 67 | "source": { 68 | "type": "SqlDWSource", 69 | "sqlReaderQuery": "select count(X.A) from (\nselect CAST(CustomerId as nvarchar(20)) as A from wwi.SaleSmall) X where A like '%3%'", 70 | "queryTimeout": "02:00:00" 71 | }, 72 | "dataset": { 73 | "referenceName": "wwi02_sale_small_workload_02_asa", 74 | "type": "DatasetReference" 75 | } 76 | } 77 | } 78 | ] 79 | } 80 | } 81 | ], 82 | "annotations": [] 83 | }, 84 | "type": "Microsoft.Synapse/workspaces/pipelines" 85 | } -------------------------------------------------------------------------------- /Synapse/setup/pipelines/import_customer_profiles_into_cosmosdb.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Setup - Import User Profile Data into Cosmos DB", 3 | "properties": { 4 | "description": "Copy user profile data from Azure Storage to Azure Cosmos DB.", 5 | "activities": [ 6 | { 7 | "name": "Copy Customer Profile Data to Cosmos DB", 8 | "type": "Copy", 9 | "dependsOn": [], 10 | "policy": { 11 | "timeout": "7.00:00:00", 12 | "retry": 0, 13 | "retryIntervalInSeconds": 30, 14 | "secureOutput": false, 15 | "secureInput": false 16 | }, 17 | "userProperties": [], 18 | "typeProperties": { 19 | "source": { 20 | "type": "JsonSource", 21 | "storeSettings": { 22 | "type": "AzureBlobStorageReadSettings", 23 | "recursive": true, 24 | "wildcardFolderPath": "online-user-profiles-01", 25 | "wildcardFileName": "*.json", 26 | "enablePartitionDiscovery": false 27 | } 28 | }, 29 | "sink": { 30 | "type": "CosmosDbSqlApiSink", 31 | "writeBehavior": "insert" 32 | }, 33 | "enableStaging": false, 34 | "parallelCopies": 100, 35 | "enableSkipIncompatibleRow": true, 36 | "dataIntegrationUnits": 16 37 | }, 38 | "inputs": [ 39 | { 40 | "referenceName": "wwi02_online_user_profiles_01_adal", 41 | "type": "DatasetReference" 42 | } 43 | ], 44 | "outputs": [ 45 | { 46 | "referenceName": "customer_profile_cosmosdb", 47 | "type": "DatasetReference" 48 | } 49 | ] 50 | } 51 | ], 52 | "annotations": [] 53 | }, 54 | "type": "Microsoft.Synapse/workspaces/pipelines" 55 | } -------------------------------------------------------------------------------- /Synapse/setup/pipelines/import_poc_customer_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Setup - Copy PoC Customer Data", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "Copy Customer", 7 | "type": "Copy", 8 | "dependsOn": [], 9 | "policy": { 10 | "timeout": "7.00:00:00", 11 | "retry": 0, 12 | "retryIntervalInSeconds": 30, 13 | "secureOutput": false, 14 | "secureInput": false 15 | }, 16 | "userProperties": [], 17 | "typeProperties": { 18 | "source": { 19 | "type": "DelimitedTextSource", 20 | "storeSettings": { 21 | "type": "AzureBlobFSReadSettings", 22 | "recursive": true 23 | }, 24 | "formatSettings": { 25 | "type": "DelimitedTextReadSettings" 26 | } 27 | }, 28 | "sink": { 29 | "type": "SqlDWSink", 30 | "allowPolyBase": true, 31 | "polyBaseSettings": { 32 | "rejectValue": 0, 33 | "rejectType": "value", 34 | "useTypeDefault": true 35 | } 36 | }, 37 | "enableStaging": true, 38 | "stagingSettings": { 39 | "linkedServiceName": { 40 | "referenceName": "#BLOB_STORAGE_LINKED_SERVICE_NAME#", 41 | "type": "LinkedServiceReference" 42 | }, 43 | "path": "staging" 44 | }, 45 | "translator": { 46 | "type": "TabularTranslator", 47 | "mappings": [ 48 | { 49 | "source": { 50 | "name": "CustomerId", 51 | "type": "String" 52 | }, 53 | "sink": { 54 | "name": "CustomerId", 55 | "type": "Int32" 56 | } 57 | }, 58 | { 59 | "source": { 60 | "name": "FirstName", 61 | "type": "String" 62 | }, 63 | "sink": { 64 | "name": "FirstName", 65 | "type": "String" 66 | } 67 | }, 68 | { 69 | "source": { 70 | "name": "MiddleInitial", 71 | "type": "String" 72 | }, 73 | "sink": { 74 | "name": "MiddleInitial", 75 | "type": "String" 76 | } 77 | }, 78 | { 79 | "source": { 80 | "name": "LastName", 81 | "type": "String" 82 | }, 83 | "sink": { 84 | "name": "LastName", 85 | "type": "String" 86 | } 87 | }, 88 | { 89 | "source": { 90 | "name": "FullName", 91 | "type": "String" 92 | }, 93 | "sink": { 94 | "name": "FullName", 95 | "type": "String" 96 | } 97 | } 98 | ] 99 | } 100 | }, 101 | "inputs": [ 102 | { 103 | "referenceName": "wwi02_poc_customer_adls", 104 | "type": "DatasetReference" 105 | } 106 | ], 107 | "outputs": [ 108 | { 109 | "referenceName": "wwi02_poc_customer_asa", 110 | "type": "DatasetReference" 111 | } 112 | ] 113 | } 114 | ], 115 | "annotations": [] 116 | } 117 | } -------------------------------------------------------------------------------- /Synapse/setup/pipelines/load_sql_pool_from_data_lake.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Setup - Load SQL Pool", 3 | "properties": { 4 | "activities": [ 5 | { 6 | "name": "Process years", 7 | "type": "ForEach", 8 | "dependsOn": [], 9 | "userProperties": [], 10 | "typeProperties": { 11 | "items": { 12 | "value": "@pipeline().parameters.Years", 13 | "type": "Expression" 14 | }, 15 | "isSequential": true, 16 | "activities": [ 17 | { 18 | "name": "Copy Sale Small Year", 19 | "type": "Copy", 20 | "dependsOn": [], 21 | "policy": { 22 | "timeout": "7.00:00:00", 23 | "retry": 0, 24 | "retryIntervalInSeconds": 30, 25 | "secureOutput": false, 26 | "secureInput": false 27 | }, 28 | "userProperties": [], 29 | "typeProperties": { 30 | "source": { 31 | "type": "ParquetSource", 32 | "storeSettings": { 33 | "type": "AzureBlobFSReadSettings", 34 | "recursive": true, 35 | "wildcardFolderPath": { 36 | "value": "@concat('sale-small/Year=',item(), '/*')", 37 | "type": "Expression" 38 | }, 39 | "wildcardFileName": "*.parquet", 40 | "enablePartitionDiscovery": false 41 | } 42 | }, 43 | "sink": { 44 | "type": "SqlDWSink", 45 | "allowPolyBase": true, 46 | "polyBaseSettings": { 47 | "rejectValue": 0, 48 | "rejectType": "value", 49 | "useTypeDefault": true 50 | }, 51 | "disableMetricsCollection": false 52 | }, 53 | "enableStaging": true, 54 | "stagingSettings": { 55 | "linkedServiceName": { 56 | "referenceName": "#BLOB_STORAGE_LINKED_SERVICE_NAME#", 57 | "type": "LinkedServiceReference" 58 | }, 59 | "path": "staging" 60 | }, 61 | "translator": { 62 | "type": "TabularTranslator", 63 | "mappings": [ 64 | { 65 | "source": { 66 | "name": "TransactionId", 67 | "type": "String" 68 | }, 69 | "sink": { 70 | "name": "TransactionId", 71 | "type": "Guid" 72 | } 73 | }, 74 | { 75 | "source": { 76 | "name": "CustomerId", 77 | "type": "Int32" 78 | }, 79 | "sink": { 80 | "name": "CustomerId", 81 | "type": "Int32" 82 | } 83 | }, 84 | { 85 | "source": { 86 | "name": "ProductId", 87 | "type": "Int16" 88 | }, 89 | "sink": { 90 | "name": "ProductId", 91 | "type": "Int16" 92 | } 93 | }, 94 | { 95 | "source": { 96 | "name": "Quantity", 97 | "type": "SByte" 98 | }, 99 | "sink": { 100 | "name": "Quantity", 101 | "type": "Byte" 102 | } 103 | }, 104 | { 105 | "source": { 106 | "name": "Price", 107 | "type": "Decimal" 108 | }, 109 | "sink": { 110 | "name": "Price", 111 | "type": "Decimal" 112 | } 113 | }, 114 | { 115 | "source": { 116 | "name": "TotalAmount", 117 | "type": "Decimal" 118 | }, 119 | "sink": { 120 | "name": "TotalAmount", 121 | "type": "Decimal" 122 | } 123 | }, 124 | { 125 | "source": { 126 | "name": "TransactionDate", 127 | "type": "Int32" 128 | }, 129 | "sink": { 130 | "name": "TransactionDateId", 131 | "type": "Int32" 132 | } 133 | }, 134 | { 135 | "source": { 136 | "name": "ProfitAmount", 137 | "type": "Decimal" 138 | }, 139 | "sink": { 140 | "name": "ProfitAmount", 141 | "type": "Decimal" 142 | } 143 | }, 144 | { 145 | "source": { 146 | "name": "Hour", 147 | "type": "SByte" 148 | }, 149 | "sink": { 150 | "name": "Hour", 151 | "type": "Byte" 152 | } 153 | }, 154 | { 155 | "source": { 156 | "name": "Minute", 157 | "type": "SByte" 158 | }, 159 | "sink": { 160 | "name": "Minute", 161 | "type": "Byte" 162 | } 163 | }, 164 | { 165 | "source": { 166 | "name": "StoreId", 167 | "type": "Int16" 168 | }, 169 | "sink": { 170 | "name": "StoreId", 171 | "type": "Int16" 172 | } 173 | } 174 | ] 175 | } 176 | }, 177 | "inputs": [ 178 | { 179 | "referenceName": "wwi02_sale_small_adls", 180 | "type": "DatasetReference" 181 | } 182 | ], 183 | "outputs": [ 184 | { 185 | "referenceName": "wwi02_sale_small_asa", 186 | "type": "DatasetReference" 187 | } 188 | ] 189 | } 190 | ] 191 | } 192 | }, 193 | { 194 | "name": "Copy Product", 195 | "type": "Copy", 196 | "dependsOn": [], 197 | "policy": { 198 | "timeout": "7.00:00:00", 199 | "retry": 0, 200 | "retryIntervalInSeconds": 30, 201 | "secureOutput": false, 202 | "secureInput": false 203 | }, 204 | "userProperties": [], 205 | "typeProperties": { 206 | "source": { 207 | "type": "DelimitedTextSource", 208 | "storeSettings": { 209 | "type": "AzureBlobFSReadSettings", 210 | "recursive": true 211 | }, 212 | "formatSettings": { 213 | "type": "DelimitedTextReadSettings" 214 | } 215 | }, 216 | "sink": { 217 | "type": "SqlDWSink", 218 | "preCopyScript": "truncate table wwi.Product", 219 | "allowPolyBase": true, 220 | "polyBaseSettings": { 221 | "rejectValue": 0, 222 | "rejectType": "value", 223 | "useTypeDefault": true 224 | }, 225 | "disableMetricsCollection": false 226 | }, 227 | "enableStaging": true, 228 | "stagingSettings": { 229 | "linkedServiceName": { 230 | "referenceName": "#BLOB_STORAGE_LINKED_SERVICE_NAME#", 231 | "type": "LinkedServiceReference" 232 | }, 233 | "path": "staging" 234 | }, 235 | "translator": { 236 | "type": "TabularTranslator", 237 | "mappings": [ 238 | { 239 | "source": { 240 | "type": "String", 241 | "ordinal": 1 242 | }, 243 | "sink": { 244 | "name": "ProductId", 245 | "type": "Int16" 246 | } 247 | }, 248 | { 249 | "source": { 250 | "type": "String", 251 | "ordinal": 2 252 | }, 253 | "sink": { 254 | "name": "Seasonality", 255 | "type": "Byte" 256 | } 257 | }, 258 | { 259 | "source": { 260 | "type": "String", 261 | "ordinal": 3 262 | }, 263 | "sink": { 264 | "name": "Price", 265 | "type": "Decimal" 266 | } 267 | }, 268 | { 269 | "source": { 270 | "type": "String", 271 | "ordinal": 4 272 | }, 273 | "sink": { 274 | "name": "Profit", 275 | "type": "Decimal" 276 | } 277 | } 278 | ] 279 | } 280 | }, 281 | "inputs": [ 282 | { 283 | "referenceName": "wwi02_product_adls", 284 | "type": "DatasetReference" 285 | } 286 | ], 287 | "outputs": [ 288 | { 289 | "referenceName": "wwi02_product_asa", 290 | "type": "DatasetReference" 291 | } 292 | ] 293 | }, 294 | { 295 | "name": "Copy Date", 296 | "type": "Copy", 297 | "dependsOn": [], 298 | "policy": { 299 | "timeout": "7.00:00:00", 300 | "retry": 0, 301 | "retryIntervalInSeconds": 30, 302 | "secureOutput": false, 303 | "secureInput": false 304 | }, 305 | "userProperties": [], 306 | "typeProperties": { 307 | "source": { 308 | "type": "DelimitedTextSource", 309 | "storeSettings": { 310 | "type": "AzureBlobFSReadSettings", 311 | "recursive": true 312 | }, 313 | "formatSettings": { 314 | "type": "DelimitedTextReadSettings" 315 | } 316 | }, 317 | "sink": { 318 | "type": "SqlDWSink", 319 | "preCopyScript": "truncate table wwi.Date", 320 | "allowPolyBase": true, 321 | "polyBaseSettings": { 322 | "rejectValue": 0, 323 | "rejectType": "value", 324 | "useTypeDefault": true 325 | }, 326 | "disableMetricsCollection": false 327 | }, 328 | "enableStaging": true, 329 | "stagingSettings": { 330 | "linkedServiceName": { 331 | "referenceName": "#BLOB_STORAGE_LINKED_SERVICE_NAME#", 332 | "type": "LinkedServiceReference" 333 | }, 334 | "path": "staging" 335 | }, 336 | "translator": { 337 | "type": "TabularTranslator", 338 | "mappings": [ 339 | { 340 | "source": { 341 | "type": "String", 342 | "ordinal": 1 343 | }, 344 | "sink": { 345 | "name": "DateId", 346 | "type": "Int32" 347 | } 348 | }, 349 | { 350 | "source": { 351 | "type": "String", 352 | "ordinal": 2 353 | }, 354 | "sink": { 355 | "name": "Day", 356 | "type": "Byte" 357 | } 358 | }, 359 | { 360 | "source": { 361 | "type": "String", 362 | "ordinal": 3 363 | }, 364 | "sink": { 365 | "name": "Month", 366 | "type": "Byte" 367 | } 368 | }, 369 | { 370 | "source": { 371 | "type": "String", 372 | "ordinal": 4 373 | }, 374 | "sink": { 375 | "name": "Quarter", 376 | "type": "Byte" 377 | } 378 | }, 379 | { 380 | "source": { 381 | "type": "String", 382 | "ordinal": 5 383 | }, 384 | "sink": { 385 | "name": "Year", 386 | "type": "Int16" 387 | } 388 | } 389 | ] 390 | } 391 | }, 392 | "inputs": [ 393 | { 394 | "referenceName": "wwi02_date_adls", 395 | "type": "DatasetReference" 396 | } 397 | ], 398 | "outputs": [ 399 | { 400 | "referenceName": "wwi02_date_asa", 401 | "type": "DatasetReference" 402 | } 403 | ] 404 | } 405 | ], 406 | "parameters": { 407 | "Years": { 408 | "type": "array", 409 | "defaultValue": [ 410 | 2019 411 | ] 412 | } 413 | }, 414 | "annotations": [] 415 | }, 416 | "type": "Microsoft.Synapse/workspaces/pipelines" 417 | } -------------------------------------------------------------------------------- /Synapse/setup/readme.md: -------------------------------------------------------------------------------- 1 | # Deploy and configure your Azure Synapse Analytics workspace 2 | 3 | >**IMPORTANT** 4 | > 5 | >This setup procedure needs to be executed only once for the Azure Synapse Analytics labs in this repo. If you have already excuted the deployment and configuration procedure for a previous lab, you do not need to run it again. 6 | 7 | ## Pre-requisites for deployment 8 | 9 | The following requirements must be met before the deployment: 10 | 11 | - A resource group (this will be provided during the deployment process). 12 | 13 | >**IMPORTANT** 14 | > 15 | >In case you didn't create the resource group yourself, make sure your account has the `Owner` role on the resource group. 16 | > 17 | >Also, your account (i.e. the Azure AD account used to deploy the Azure Synapse Analytics workspace) must have permissions to create new resource groups in the subscription (this is required because Synapse Analytics requires an additional resource group to keep various hidden artifacts; this resource group is created during the deployment process). 18 | 19 | - A unique suffix to be used when generating the name of the workspace. All workspaces deployed using the templates in this repo are named `asagaworkspace`, where `` gets replaced with the value you provide. Make sure the unique suffix is specific enough to avoid potential naming collisions (i.e. avoid using common values like `01`, `1`, `test`, etc.). Make sure you remember the unique suffix as you need to use it for additional configuration once the Azure Synapse Analytics workspace deployment is complete. 20 | - A password for the SQL admin account of the workspace. Make sure you save the password in a secure location (like a password manager) as you will need to use it later. 21 | - A GitHub account to access the labs repository. 22 | 23 | ## Configure the Azure Cloud Shell 24 | 25 | >**NOTE** 26 | > 27 | >If Cloud Shell is already configured, you can skip this section entirely and advance to [Deploy the Synapse Analytics workspace](#deploy-the-synapse-analytics-workspace). 28 | 29 | In the Azure Portal, navigate to your resource group and create a new storage account to be used in the Cloud Shell configuration process (make sure the resource type you create is `Storage account`). In the newly created storage account, select `File shares` (under the `File service` settings group) and create a new file share. 30 | 31 | Next, select the Cloud Shell icon (located in the top right part of the page) and then select `PowerShell`: 32 | 33 | ![Cloud Shell configuration start](./media/cloudshell-configure-01.png) 34 | 35 | Select your subscription under `Subscription` if it's not already selected, and then select `Show advanced settings`: 36 | 37 | ![Cloud Shell configuration advanced settings](./media/cloudshell-configure-02.png) 38 | 39 | Provide values for the following fields: 40 | 41 | - **Cloud Shell region**: the same region as the region of your resource group. 42 | - **Resource group**: select `Use existing` and then select you resource group from the list. 43 | - **Storage account**: select `Use existing` and then select the storage account you created above. 44 | - **File share**: select `Use existing` and then select the file share you created above. 45 | 46 | Select `Attach storage` once all the values are in place. 47 | 48 | ![Cloud Shell configuration advanced settings values](./media/cloudshell-configure-03.png) 49 | 50 | Once configuration is complete, you should get an instance of Cloud Shell: 51 | 52 | ![Cloud Shell](./media/cloudshell-configure-04.png) 53 | 54 | ## Deploy the Synapse Analytics workspace 55 | 56 | Click the `Deploy to Azure` button below to start the deployment process. 57 | 58 | [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmicrosoft%2FCSU-AzureLabs%2Fmain%2FSynapse%2Fsetup%2Fautomation%2F00-asa-workspace-core.json) 59 | 60 | You should see next the `Custom deployment` screen where you need to provide the following (see [Pre-requisites for deployment](#pre-requisites-for-deployment) above for details): 61 | 62 | - The resource group where the Synapse Analytics workspace will be deployed. 63 | - The unique suffix used to generate the name of the workspace (**NOTE**: Make sure this value has a **maximum** length of **9 characters**). 64 | - The password for the SQL Administrator account. 65 | 66 | Select `Review + create` to validate the settings. 67 | 68 | ![Synapse Analytics workspace deployment configuration](./media/asaworkspace-deploy-configure.png) 69 | 70 | Once the validation is passed, select `Create` to start the deployment. You should see next an indication of the deployment progress: 71 | 72 | ![Synapse Analytics workspace deployment progress](./media/asaworkspace-deploy-progress.png) 73 | 74 | Wait until the deployment completes successfully before proceeding to the next step. 75 | 76 | ## Tag your resource group with the unique suffix 77 | 78 | In the Azure Portal, navigate to the resource group you used to deploy the Synapse Analytics workspace (see [Pre-requisites for deployment](#pre-requisites-for-deployment) above for details). 79 | 80 | Select the `Tags` section and add check if a tag named `DeploymentId` was added to the resource group. If it's already there, make sure its values matches your unique suffix. If not, add a new tag named `DeploymentId` (use the unique suffix as the value of the tag), and then select `Apply` to save it. 81 | 82 | ![Synapse Analytics workspace resource group tagging](./media/asaworkspace-deploy-tag.png) 83 | 84 | The deployment of your Synapse Analytics workspace is now complete. Next, you will deploy the artifacts required by the labs into the newly created Synapse Analytics workspace. 85 | 86 | ## Run the global setup script in Cloud Shell 87 | 88 | In the Azure Portal, navigate to the resource group you used to deploy the Synapse Analytics workspace (see [Pre-requisites for deployment](#pre-requisites-for-deployment) above for details) and start a Cloud Shell instance (see [Configure the Azure Cloud Shell](#configure-the-azure-cloud-shell) above for details). 89 | 90 | Once the Cloud Shell instance becomes available, run ```az login``` to make sure the correct account and subscription context are set: 91 | 92 | ![Cloud Shell login](./media/cloudshell-setup-01.png) 93 | 94 | Clone the content packs repository into the `asa` local folder using 95 | 96 | ```cmd 97 | git clone https://github.com/microsoft/CSU-AzureLabs asa 98 | ``` 99 | 100 | If GIT asks for credentials, provide your GitHub username and password. 101 | 102 | >**IMPORTANT** 103 | > 104 | >If your GitHub account has two-factor authentication activated, you need to provide a PAT (Personal Access Token) instead your password. For more details, read the [Creating a personal access token](https://docs.github.com/en/free-pro-team@latest/github/authenticating-to-github/creating-a-personal-access-token) section in GitHub Docs. 105 | > 106 | >When pasting your password or PAT into the Cloud Shell window, make sure you are familiar with the supported key combinations (Shift-INS for Windows and Cmd-V for Mac). For more details, see [Using the Azure Cloud Shell window](https://docs.microsoft.com/en-us/azure/cloud-shell/using-the-shell-window#copy-and-paste). 107 | 108 | Once the repository is successfully cloned, you shoud see a result similar to this: 109 | 110 | ![Cloud Shell git clone repository](./media/cloudshell-setup-02.png) 111 | 112 | Change your current directory using 113 | 114 | ```cmd 115 | cd asa/Synapse/setup/automation 116 | ``` 117 | 118 | and then start the setup script using 119 | 120 | ```powershell 121 | .\01-environment-setup.ps1 122 | ``` 123 | 124 | Make sure the selected subscription is the one that contains the resource group where you deployed the Synapse Analytics workspace: 125 | 126 | ![Cloud Shell select subscription](./media/cloudshell-setup-03.png) 127 | 128 | Enter the name of the resource group where you deployed the Synapse Analytics workspace: 129 | 130 | ![Cloud Shell select resource group](./media/cloudshell-setup-04.png) 131 | 132 | The setup script will now proceed to create all necessary Synapse Analytics artifacts in your environment. 133 | 134 | The process should take 5 to 10 minutes to finish. Wait until the setup script is finished before proceeding to the next steps. 135 | -------------------------------------------------------------------------------- /Synapse/setup/sql/01-create-logins.sql: -------------------------------------------------------------------------------- 1 | create login [asa.sql.workload01] with password = '#PASSWORD#' 2 | create login [asa.sql.workload02] with password = '#PASSWORD#' 3 | create login [asa.sql.import01] with password = '#PASSWORD#' 4 | create login [asa.sql.import02] with password = '#PASSWORD#' 5 | create login [asa.sql.highperf] with password = '#PASSWORD#' -------------------------------------------------------------------------------- /Synapse/setup/sql/02-create-users.sql: -------------------------------------------------------------------------------- 1 | create user [asa.sql.workload01] for login [asa.sql.workload01] 2 | create user [asa.sql.workload02] for login [asa.sql.workload02] 3 | execute sp_addrolemember 'db_datareader', 'asa.sql.workload01' 4 | execute sp_addrolemember 'db_datareader', 'asa.sql.workload02' 5 | 6 | 7 | create user [asa.sql.import01] for login [asa.sql.import01] 8 | create user [asa.sql.import02] for login [asa.sql.import02] 9 | execute sp_addrolemember 'db_owner', 'asa.sql.import01' 10 | execute sp_addrolemember 'db_owner', 'asa.sql.import02' 11 | 12 | 13 | create user [asa.sql.highperf] for login [asa.sql.highperf] 14 | execute sp_addrolemember 'db_owner', 'asa.sql.highperf' 15 | execute sp_addrolemember 'staticrc80', 'asa.sql.highperf' 16 | 17 | --CREATE USER [#USER_NAME#] FROM EXTERNAL PROVIDER; 18 | --EXEC sp_addrolemember 'db_owner', '#USER_NAME#' -------------------------------------------------------------------------------- /Synapse/setup/sql/03-create-schemas.sql: -------------------------------------------------------------------------------- 1 | create schema wwi 2 | go 3 | create schema wwi_security 4 | go 5 | create schema wwi_staging 6 | go 7 | create schema wwi_perf 8 | go 9 | create schema wwi_poc 10 | go 11 | create schema wwi_ml 12 | go -------------------------------------------------------------------------------- /Synapse/setup/sql/04-create-tables-in-wwi-schema.sql: -------------------------------------------------------------------------------- 1 | IF OBJECT_ID(N'[wwi].[Product]', N'U') IS NOT NULL 2 | DROP TABLE [wwi].[Product] 3 | 4 | CREATE TABLE [wwi].[Product] 5 | ( 6 | ProductId SMALLINT NOT NULL, 7 | Seasonality TINYINT NOT NULL, 8 | Price DECIMAL(6,2), 9 | Profit DECIMAL(6,2) 10 | ) 11 | WITH 12 | ( 13 | DISTRIBUTION = REPLICATE 14 | ) 15 | 16 | IF OBJECT_ID(N'[wwi].[Date]', N'U') IS NOT NULL 17 | DROP TABLE [wwi].[Date] 18 | 19 | CREATE TABLE [wwi].[Date] 20 | ( 21 | DateId int not null, 22 | Day tinyint not null, 23 | Month tinyint not null, 24 | Quarter tinyint not null, 25 | Year smallint not null 26 | ) 27 | WITH 28 | ( 29 | DISTRIBUTION = REPLICATE 30 | ) 31 | 32 | IF OBJECT_ID(N'[wwi].[SaleSmall]', N'U') IS NOT NULL 33 | DROP TABLE [wwi].[SaleSmall] 34 | 35 | CREATE TABLE [wwi].[SaleSmall] 36 | ( 37 | [TransactionId] [uniqueidentifier] NOT NULL, 38 | [CustomerId] [int] NOT NULL, 39 | [ProductId] [smallint] NOT NULL, 40 | [Quantity] [tinyint] NOT NULL, 41 | [Price] [decimal](9,2) NOT NULL, 42 | [TotalAmount] [decimal](9,2) NOT NULL, 43 | [TransactionDateId] [int] NOT NULL, 44 | [ProfitAmount] [decimal](9,2) NOT NULL, 45 | [Hour] [tinyint] NOT NULL, 46 | [Minute] [tinyint] NOT NULL, 47 | [StoreId] [smallint] NOT NULL 48 | ) 49 | WITH 50 | ( 51 | DISTRIBUTION = HASH ( [CustomerId] ), 52 | CLUSTERED COLUMNSTORE INDEX, 53 | PARTITION 54 | ( 55 | [TransactionDateId] RANGE RIGHT FOR VALUES ( 56 | 20100101, 20100201, 20100301, 20100401, 20100501, 20100601, 20100701, 20100801, 20100901, 20101001, 20101101, 20101201, 57 | 20110101, 20110201, 20110301, 20110401, 20110501, 20110601, 20110701, 20110801, 20110901, 20111001, 20111101, 20111201, 58 | 20120101, 20120201, 20120301, 20120401, 20120501, 20120601, 20120701, 20120801, 20120901, 20121001, 20121101, 20121201, 59 | 20130101, 20130201, 20130301, 20130401, 20130501, 20130601, 20130701, 20130801, 20130901, 20131001, 20131101, 20131201, 60 | 20140101, 20140201, 20140301, 20140401, 20140501, 20140601, 20140701, 20140801, 20140901, 20141001, 20141101, 20141201, 61 | 20150101, 20150201, 20150301, 20150401, 20150501, 20150601, 20150701, 20150801, 20150901, 20151001, 20151101, 20151201, 62 | 20160101, 20160201, 20160301, 20160401, 20160501, 20160601, 20160701, 20160801, 20160901, 20161001, 20161101, 20161201, 63 | 20170101, 20170201, 20170301, 20170401, 20170501, 20170601, 20170701, 20170801, 20170901, 20171001, 20171101, 20171201, 64 | 20180101, 20180201, 20180301, 20180401, 20180501, 20180601, 20180701, 20180801, 20180901, 20181001, 20181101, 20181201, 65 | 20190101, 20190201, 20190301, 20190401, 20190501, 20190601, 20190701, 20190801, 20190901, 20191001, 20191101, 20191201) 66 | ) 67 | ) -------------------------------------------------------------------------------- /Synapse/setup/sql/05-create-tables-in-wwi-ml-schema-delta.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTERNAL TABLE [wwi_ml].[MLModelExt] 2 | ( 3 | [Model] [varbinary](max) NULL 4 | ) 5 | WITH 6 | ( 7 | LOCATION='/ml/onnx-hex' , 8 | DATA_SOURCE = ModelStorage , 9 | FILE_FORMAT = csv , 10 | REJECT_TYPE = VALUE , 11 | REJECT_VALUE = 0 12 | ) 13 | GO -------------------------------------------------------------------------------- /Synapse/setup/sql/05-create-tables-in-wwi-ml-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE MASTER KEY 2 | GO 3 | 4 | CREATE DATABASE SCOPED CREDENTIAL StorageCredential 5 | WITH 6 | IDENTITY = 'SHARED ACCESS SIGNATURE' 7 | , SECRET = '#DATA_LAKE_ACCOUNT_KEY#' 8 | ; 9 | 10 | --DROP EXTERNAL DATA SOURCE ModelStorage 11 | -- Create an external data source with CREDENTIAL option. 12 | CREATE EXTERNAL DATA SOURCE ModelStorage 13 | WITH 14 | ( LOCATION = 'wasbs://wwi-02@#DATA_LAKE_ACCOUNT_NAME#.blob.core.windows.net' 15 | , CREDENTIAL = StorageCredential 16 | , TYPE = HADOOP 17 | ) 18 | ; 19 | CREATE EXTERNAL FILE FORMAT csv 20 | WITH ( 21 | FORMAT_TYPE = DELIMITEDTEXT, 22 | FORMAT_OPTIONS ( 23 | FIELD_TERMINATOR = ',', 24 | STRING_DELIMITER = '', 25 | DATE_FORMAT = '', 26 | USE_TYPE_DEFAULT = False 27 | ) 28 | ); 29 | 30 | 31 | CREATE EXTERNAL TABLE [wwi_ml].[MLModelExt] 32 | ( 33 | [Model] [varbinary](max) NULL 34 | ) 35 | WITH 36 | ( 37 | LOCATION='/ml/onnx-hex' , 38 | DATA_SOURCE = ModelStorage , 39 | FILE_FORMAT = csv , 40 | REJECT_TYPE = VALUE , 41 | REJECT_VALUE = 0 42 | ) 43 | GO 44 | 45 | CREATE TABLE [wwi_ml].[MLModel] 46 | ( 47 | [Id] [int] IDENTITY(1,1) NOT NULL, 48 | [Model] [varbinary](max) NULL, 49 | [Description] [varchar](200) NULL 50 | ) 51 | WITH 52 | ( 53 | DISTRIBUTION = REPLICATE, 54 | HEAP 55 | ) 56 | GO 57 | -------------------------------------------------------------------------------- /Synapse/setup/sql/06-create-tables-in-wwi-security-schema.sql: -------------------------------------------------------------------------------- 1 | 2 | create user [CEO] without login 3 | create user [DataAnalystMiami] without login 4 | create user [DataAnalystSanDiego] without login 5 | 6 | IF OBJECT_ID(N'[wwi_security].[CustomerInfo]', N'U') IS NOT NULL 7 | DROP TABLE [wwi_security].[CustomerInfo] 8 | 9 | CREATE TABLE [wwi_security].[CustomerInfo] 10 | ( 11 | [UserName] [nvarchar](100) NULL, 12 | [Gender] [nvarchar](10) NULL, 13 | [Phone] [nvarchar](50) NULL, 14 | [Email] [nvarchar](150) NULL, 15 | [CreditCard] [nvarchar](21) NULL 16 | ) 17 | WITH 18 | ( 19 | DISTRIBUTION = REPLICATE, 20 | CLUSTERED COLUMNSTORE INDEX 21 | ) 22 | GO 23 | 24 | COPY INTO [wwi_security].[CustomerInfo] 25 | FROM 'https://#DATA_LAKE_ACCOUNT_NAME#.dfs.core.windows.net/wwi-02/security/customerinfo.csv' 26 | WITH ( 27 | CREDENTIAL = (IDENTITY = 'Storage Account Key', SECRET = '#DATA_LAKE_ACCOUNT_KEY#'), 28 | FILE_TYPE = 'CSV', 29 | FIRSTROW = 2, 30 | FIELDQUOTE='''', 31 | ENCODING='UTF8' 32 | ) 33 | GO 34 | 35 | IF OBJECT_ID(N'[wwi_security].[Sale]', N'U') IS NOT NULL 36 | DROP TABLE [wwi_security].[Sale] 37 | GO 38 | 39 | CREATE TABLE [wwi_security].[Sale] 40 | ( 41 | [ProductId] [int] NOT NULL, 42 | [Analyst] [nvarchar](100) NOT NULL, 43 | [Product] [nvarchar](200) NOT NULL, 44 | [CampaignName] [nvarchar](200) NOT NULL, 45 | [Quantity] [int] NOT NULL, 46 | [Region] [nvarchar](50) NOT NULL, 47 | [State] [nvarchar](50) NOT NULL, 48 | [City] [nvarchar](50) NOT NULL, 49 | [Revenue] [nvarchar](50) NULL, 50 | [RevenueTarget] [nvarchar](50) NULL 51 | ) 52 | WITH 53 | ( 54 | DISTRIBUTION = ROUND_ROBIN, 55 | CLUSTERED COLUMNSTORE INDEX 56 | ) 57 | GO 58 | 59 | COPY INTO [wwi_security].[Sale] 60 | FROM 'https://#DATA_LAKE_ACCOUNT_NAME#.dfs.core.windows.net/wwi-02/security/factsale.csv' 61 | WITH ( 62 | CREDENTIAL = (IDENTITY = 'Storage Account Key', SECRET = '#DATA_LAKE_ACCOUNT_KEY#'), 63 | FILE_TYPE = 'CSV', 64 | FIRSTROW = 2, 65 | FIELDQUOTE='''', 66 | ENCODING='UTF8' 67 | ) 68 | GO 69 | -------------------------------------------------------------------------------- /Synapse/setup/sql/07-create-wwi-perf-sale-heap.sql: -------------------------------------------------------------------------------- 1 | EXECUTE AS USER = 'asa.sql.highperf' 2 | 3 | IF OBJECT_ID(N'[wwi_perf].[Sale_Heap]', N'U') IS NOT NULL 4 | DROP TABLE [wwi_perf].[Sale_Heap] 5 | 6 | CREATE TABLE [wwi_perf].[Sale_Heap] 7 | WITH 8 | ( 9 | DISTRIBUTION = ROUND_ROBIN, 10 | HEAP 11 | ) 12 | AS 13 | SELECT 14 | * 15 | FROM 16 | [wwi].[SaleSmall] 17 | WHERE 18 | TransactionDateId >= 20190101 19 | OPTION (LABEL = 'CTAS : Sale_Heap') -------------------------------------------------------------------------------- /Synapse/setup/sql/08-create-wwi-perf-sale-partition01.sql: -------------------------------------------------------------------------------- 1 | EXECUTE AS USER = 'asa.sql.highperf' 2 | 3 | 4 | IF OBJECT_ID(N'[wwi_perf].[Sale_Partition01]', N'U') IS NOT NULL 5 | DROP TABLE [wwi_perf].[Sale_Partition01] 6 | 7 | CREATE TABLE [wwi_perf].[Sale_Partition01] 8 | WITH 9 | ( 10 | DISTRIBUTION = HASH ( [CustomerId] ), 11 | CLUSTERED COLUMNSTORE INDEX, 12 | PARTITION 13 | ( 14 | [TransactionDateId] RANGE RIGHT FOR VALUES ( 15 | --20100101, 20100201, 20100301, 20100401, 20100501, 20100601, 20100701, 20100801, 20100901, 20101001, 20101101, 20101201, 16 | --20110101, 20110201, 20110301, 20110401, 20110501, 20110601, 20110701, 20110801, 20110901, 20111001, 20111101, 20111201, 17 | --20120101, 20120201, 20120301, 20120401, 20120501, 20120601, 20120701, 20120801, 20120901, 20121001, 20121101, 20121201, 18 | --20130101, 20130201, 20130301, 20130401, 20130501, 20130601, 20130701, 20130801, 20130901, 20131001, 20131101, 20131201, 19 | --20140101, 20140201, 20140301, 20140401, 20140501, 20140601, 20140701, 20140801, 20140901, 20141001, 20141101, 20141201, 20 | --20150101, 20150201, 20150301, 20150401, 20150501, 20150601, 20150701, 20150801, 20150901, 20151001, 20151101, 20151201, 21 | --20160101, 20160201, 20160301, 20160401, 20160501, 20160601, 20160701, 20160801, 20160901, 20161001, 20161101, 20161201, 22 | --20170101, 20170201, 20170301, 20170401, 20170501, 20170601, 20170701, 20170801, 20170901, 20171001, 20171101, 20171201, 23 | --20180101, 20180201, 20180301, 20180401, 20180501, 20180601, 20180701, 20180801, 20180901, 20181001, 20181101, 20181201, 24 | 20190101, 20190201, 20190301, 20190401, 20190501, 20190601, 20190701, 20190801, 20190901, 20191001, 20191101, 20191201) 25 | ) 26 | ) 27 | AS 28 | SELECT 29 | * 30 | FROM 31 | [wwi_perf].[Sale_Heap] 32 | OPTION (LABEL = 'CTAS : Sale_Partition01') -------------------------------------------------------------------------------- /Synapse/setup/sql/09-create-wwi-perf-sale-partition02.sql: -------------------------------------------------------------------------------- 1 | EXECUTE AS USER = 'asa.sql.highperf' 2 | 3 | IF OBJECT_ID(N'[wwi_perf].[Sale_Partition02]', N'U') IS NOT NULL 4 | DROP TABLE [wwi_perf].[Sale_Partition02] 5 | 6 | CREATE TABLE [wwi_perf].[Sale_Partition02] 7 | WITH 8 | ( 9 | DISTRIBUTION = HASH ( [CustomerId] ), 10 | CLUSTERED COLUMNSTORE INDEX, 11 | PARTITION 12 | ( 13 | [TransactionDateId] RANGE RIGHT FOR VALUES ( 14 | --20100101, 20100401, 20100701, 20101001, 15 | --20110101, 20110401, 20110701, 20111001, 16 | --20120101, 20120401, 20120701, 20121001, 17 | --20130101, 20130401, 20130701, 20131001, 18 | --20140101, 20140401, 20140701, 20141001, 19 | --20150101, 20150401, 20150701, 20151001, 20 | --20160101, 20160401, 20160701, 20161001, 21 | --20170101, 20170401, 20170701, 20171001, 22 | --20180101, 20180401, 20180701, 20181001, 23 | 20190101, 20190401, 20190701, 20191001) 24 | ) 25 | ) 26 | AS 27 | SELECT 28 | * 29 | FROM 30 | [wwi_perf].[Sale_Heap] 31 | OPTION (LABEL = 'CTAS : Sale_Partition02') -------------------------------------------------------------------------------- /Synapse/setup/sql/10-create-wwi-perf-sale-index.sql: -------------------------------------------------------------------------------- 1 | EXECUTE AS USER = 'asa.sql.highperf' 2 | 3 | IF OBJECT_ID(N'[wwi_perf].[Sale_Index]', N'U') IS NOT NULL 4 | DROP TABLE [wwi_perf].[Sale_Index] 5 | 6 | CREATE TABLE [wwi_perf].[Sale_Index] 7 | WITH 8 | ( 9 | DISTRIBUTION = HASH ( [CustomerId] ), 10 | CLUSTERED INDEX (CustomerId) 11 | ) 12 | AS 13 | SELECT 14 | * 15 | FROM 16 | [wwi_perf].[Sale_Heap] 17 | OPTION (LABEL = 'CTAS : Sale_Index') 18 | -------------------------------------------------------------------------------- /Synapse/setup/sql/11-create-wwi-perf-sale-hash-ordered.sql: -------------------------------------------------------------------------------- 1 | EXECUTE AS USER = 'asa.sql.highperf' 2 | 3 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash_Ordered]', N'U') IS NOT NULL 4 | DROP TABLE [wwi_perf].[Sale_Hash_Ordered] 5 | 6 | CREATE TABLE [wwi_perf].[Sale_Hash_Ordered] 7 | WITH 8 | ( 9 | DISTRIBUTION = HASH ( [CustomerId] ), 10 | CLUSTERED COLUMNSTORE INDEX ORDER( [CustomerId] ) 11 | ) 12 | AS 13 | SELECT 14 | * 15 | FROM 16 | [wwi_perf].[Sale_Heap] 17 | OPTION (LABEL = 'CTAS : Sale_Hash_Ordered', MAXDOP 1) -------------------------------------------------------------------------------- /Synapse/setup/sql/12-create-wwi-perf-sale-hash-projection.sql: -------------------------------------------------------------------------------- 1 | EXECUTE AS USER = 'asa.sql.highperf' 2 | 3 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash_Projection]', N'U') IS NOT NULL 4 | DROP TABLE [wwi_perf].[Sale_Hash_Projection] 5 | 6 | 7 | CREATE TABLE [wwi_perf].[Sale_Hash_Projection] 8 | WITH 9 | ( 10 | DISTRIBUTION = HASH ( [CustomerId] ), 11 | HEAP 12 | ) 13 | AS 14 | SELECT 15 | [CustomerId] 16 | ,[ProductId] 17 | ,[Quantity] 18 | FROM 19 | [wwi_perf].[Sale_Heap] 20 | OPTION (LABEL = 'CTAS : Sale_Hash_Projection') -------------------------------------------------------------------------------- /Synapse/setup/sql/13-create-wwi-perf-sale-hash-projection2.sql: -------------------------------------------------------------------------------- 1 | EXECUTE AS USER = 'asa.sql.highperf' 2 | 3 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash_Projection2]', N'U') IS NOT NULL 4 | DROP TABLE [wwi_perf].[Sale_Hash_Projection2] 5 | 6 | CREATE TABLE [wwi_perf].[Sale_Hash_Projection2] 7 | WITH 8 | ( 9 | DISTRIBUTION = HASH ( [CustomerId] ), 10 | CLUSTERED COLUMNSTORE INDEX 11 | ) 12 | AS 13 | SELECT 14 | [CustomerId] 15 | ,[ProductId] 16 | ,[Quantity] 17 | FROM 18 | [wwi_perf].[Sale_Heap] 19 | OPTION (LABEL = 'CTAS : Sale_Hash_Projection2') -------------------------------------------------------------------------------- /Synapse/setup/sql/14-create-wwi-perf-sale-hash-projection-big.sql: -------------------------------------------------------------------------------- 1 | EXECUTE AS USER = 'asa.sql.highperf' 2 | 3 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash_Projection_Big]', N'U') IS NOT NULL 4 | DROP TABLE [wwi_perf].[Sale_Hash_Projection_Big] 5 | 6 | CREATE TABLE [wwi_perf].[Sale_Hash_Projection_Big] 7 | WITH 8 | ( 9 | DISTRIBUTION = HASH ( [CustomerId] ), 10 | HEAP 11 | ) 12 | AS 13 | SELECT 14 | [CustomerId] 15 | ,CAST([ProductId] as bigint) as [ProductId] 16 | ,CAST([Quantity] as bigint) as [Quantity] 17 | FROM 18 | [wwi_perf].[Sale_Heap] 19 | OPTION (LABEL = 'CTAS : Sale_Hash_Projection_Big') -------------------------------------------------------------------------------- /Synapse/setup/sql/15-create-wwi-perf-sale-hash-projection-big2.sql: -------------------------------------------------------------------------------- 1 | EXECUTE AS USER = 'asa.sql.highperf' 2 | 3 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash_Projection_Big2]', N'U') IS NOT NULL 4 | DROP TABLE [wwi_perf].[Sale_Hash_Projection_Big2] 5 | 6 | CREATE TABLE [wwi_perf].[Sale_Hash_Projection_Big2] 7 | WITH 8 | ( 9 | DISTRIBUTION = HASH ( [CustomerId] ), 10 | CLUSTERED COLUMNSTORE INDEX 11 | ) 12 | AS 13 | SELECT 14 | [CustomerId] 15 | ,CAST([ProductId] as bigint) as [ProductId] 16 | ,CAST([Quantity] as bigint) as [Quantity] 17 | FROM 18 | [wwi_perf].[Sale_Heap] 19 | OPTION (LABEL = 'CTAS : Sale_Hash_Projection_Big2') -------------------------------------------------------------------------------- /Synapse/setup/sql/16-create-poc-schema.sql: -------------------------------------------------------------------------------- 1 | IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = 'wwi_poc') 2 | BEGIN 3 | EXEC('CREATE SCHEMA wwi_poc') 4 | END 5 | 6 | IF OBJECT_ID(N'[wwi_poc].[Customer]', N'U') IS NOT NULL 7 | DROP TABLE [wwi_poc].[Customer] 8 | 9 | CREATE TABLE wwi_poc.Customer 10 | ( 11 | CustomerId INT NOT NULL 12 | ,FirstName NVARCHAR(1000) NOT NULL 13 | ,MiddleInitial NVARCHAR(10) NULL 14 | ,LastName NVARCHAR(1000) NOT NULL 15 | ,FullName NVARCHAR(2010) NOT NULL 16 | ,Gender NVARCHAR(100) NULL 17 | ,Age INT NULL 18 | ,BirthDate DATE NULL 19 | ,Address_PostalCode NVARCHAR(200) NULL 20 | ,Address_Street NVARCHAR(2000) NULL 21 | ,Address_City NVARCHAR(2000) NULL 22 | ,Address_Country NVARCHAR(2000) NULL 23 | ,Mobile NVARCHAR(500) NULL 24 | ,Email NVARCHAR(500) NULL 25 | ) 26 | WITH 27 | ( 28 | DISTRIBUTION = ROUND_ROBIN, 29 | HEAP 30 | ) 31 | 32 | IF OBJECT_ID(N'[wwi_poc].[Date]', N'U') IS NOT NULL 33 | DROP TABLE [wwi_poc].[Date] 34 | 35 | CREATE TABLE [wwi_poc].[Date] 36 | WITH 37 | ( 38 | DISTRIBUTION = ROUND_ROBIN, 39 | HEAP 40 | ) 41 | AS 42 | SELECT 43 | * 44 | FROM 45 | [wwi].[Date] 46 | 47 | 48 | 49 | IF OBJECT_ID(N'[wwi_poc].[Product]', N'U') IS NOT NULL 50 | DROP TABLE [wwi_poc].[Product] 51 | 52 | CREATE TABLE [wwi_poc].[Product] 53 | WITH 54 | ( 55 | DISTRIBUTION = ROUND_ROBIN, 56 | HEAP 57 | ) 58 | AS 59 | SELECT 60 | * 61 | FROM 62 | [wwi].[Product] -------------------------------------------------------------------------------- /Synapse/setup/sql/17-create-wwi-poc-sale-heap.sql: -------------------------------------------------------------------------------- 1 | EXECUTE AS USER = 'asa.sql.highperf' 2 | 3 | IF OBJECT_ID(N'[wwi_poc].[Sale]', N'U') IS NOT NULL 4 | DROP TABLE [wwi_poc].[Sale] 5 | 6 | CREATE TABLE [wwi_poc].[Sale] 7 | WITH 8 | ( 9 | DISTRIBUTION = ROUND_ROBIN, 10 | HEAP 11 | ) 12 | AS 13 | SELECT 14 | * 15 | FROM 16 | [wwi].[SaleSmall] 17 | WHERE 18 | TransactionDateId < 20170501 19 | AND TransactionDateId >= 20140101 20 | OPTION (LABEL = 'CTAS : wwi_poc.Sale') 21 | -------------------------------------------------------------------------------- /Synapse/setup/sql/18-get-sql-pool-artifacts.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | S.name + '.' + T.name as ObjectName 3 | ,'table' as ObjectType 4 | FROM 5 | sys.tables T 6 | join sys.schemas S on 7 | T.schema_id = S.schema_id 8 | UNION ALL 9 | SELECT 10 | name as ObjectName 11 | ,'schema' as ObjectType 12 | FROM 13 | sys.schemas 14 | UNION ALL 15 | SELECT 16 | name as ObjectName 17 | ,'externaldatasource' as ObjectType 18 | FROM 19 | sys.external_data_sources 20 | UNION ALL 21 | SELECT 22 | name as ObjectName 23 | ,'externalfileformat' as ObjectType 24 | FROM 25 | sys.external_file_formats 26 | UNION ALL 27 | SELECT 28 | name as ObjectName 29 | ,'workloadgroup' as ObjectType 30 | FROM 31 | sys.workload_management_workload_groups 32 | UNION ALL 33 | SELECT 34 | name as ObjectName 35 | ,'workloadclassifier' as ObjectType 36 | FROM 37 | sys.workload_management_workload_classifiers 38 | UNION ALL 39 | SELECT 40 | name as ObjectName 41 | ,'view' as ObjectType 42 | FROM 43 | sys.views 44 | UNION ALL 45 | SELECT 46 | name as ObjectName 47 | ,'statistic' as ObjectType 48 | FROM 49 | sys.stats 50 | UNION ALL 51 | SELECT 52 | name as ObjectName 53 | ,'index' as ObjectType 54 | FROM 55 | sys.indexes 56 | WHERE 57 | [name] is not NULL 58 | UNION ALL 59 | SELECT 60 | name as ObjectName 61 | ,'function' as ObjectType 62 | FROM 63 | sys.sql_modules M 64 | join sys.objects O ON 65 | M.object_id = O.object_id 66 | WHERE 67 | O.type = 'IF' 68 | UNION ALL 69 | SELECT 70 | name as ObjectName 71 | ,'securitypolicy' as ObjectType 72 | FROM 73 | sys.security_policies 74 | UNION ALL 75 | select 76 | S.name + '.' + T.name + '.' + MC.name as ObjectName 77 | ,'maskedcolumn' as ObjectType 78 | from 79 | sys.masked_columns MC 80 | join sys.tables T ON 81 | MC.object_id = T.object_id 82 | join sys.schemas S ON 83 | T.schema_id = S.schema_id 84 | WHERE 85 | MC.is_masked = 1 86 | -------------------------------------------------------------------------------- /Synapse/setup/sql/19-cleanup-sql-pool.sql: -------------------------------------------------------------------------------- 1 | -- DROP VIEWS 2 | 3 | IF OBJECT_ID(N'[wwi_perf].[mvCustomerSales]') IS NOT NULL 4 | drop view [wwi_perf].[mvCustomerSales] 5 | 6 | IF OBJECT_ID(N'[wwi_perf].[vTableSizes]') IS NOT NULL 7 | drop view [wwi_perf].[vTableSizes] 8 | 9 | IF OBJECT_ID(N'[wwi_perf].[vColumnStoreRowGroupStats]') IS NOT NULL 10 | drop view [wwi_perf].[vColumnStoreRowGroupStats] 11 | 12 | IF OBJECT_ID(N'[dbo].[mvTransactionItemsCounts]') IS NOT NULL 13 | drop view dbo.mvTransactionItemsCounts 14 | 15 | -- DROP STATISTICS 16 | IF EXISTS(select * from sys.stats where name = 'Sale_Hash_CustomerId') 17 | DROP STATISTICS wwi_perf.Sale_Hash.Sale_Hash_CustomerId 18 | 19 | -- DROP INDEXES 20 | 21 | IF EXISTS(select * from sys.indexes where name = 'Store_Index') 22 | drop index Store_Index on [wwi_perf].[Sale_Index] 23 | 24 | -- DROP TABLES 25 | 26 | IF OBJECT_ID(N'[wwi_staging].[SaleHeap]') IS NOT NULL 27 | DROP TABLE [wwi_staging].[SaleHeap] 28 | 29 | IF OBJECT_ID(N'[wwi_staging].[Sale]') IS NOT NULL 30 | drop table wwi_staging.Sale 31 | 32 | IF OBJECT_ID(N'[wwi_staging].[DailySalesCounts]') IS NOT NULL 33 | drop table wwi_staging.DailySalesCounts 34 | 35 | IF OBJECT_ID(N'[wwi].[CampaignAnalytics]') IS NOT NULL 36 | drop table wwi.CampaignAnalytics 37 | 38 | IF OBJECT_ID(N'[wwi].[UserProductReviews]') IS NOT NULL 39 | drop table wwi.UserProductReviews 40 | 41 | IF OBJECT_ID(N'[wwi].[UserTopProductPurchases]') IS NOT NULL 42 | drop table wwi.UserTopProductPurchases 43 | 44 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash]') IS NOT NULL 45 | drop table [wwi_perf].[Sale_Hash] 46 | 47 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash_Projection]') IS NOT NULL 48 | drop table [wwi_perf].[Sale_Hash_Projection] 49 | 50 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash_Projection2]') IS NOT NULL 51 | drop table [wwi_perf].[Sale_Hash_Projection2] 52 | 53 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash_Projection_Big]') IS NOT NULL 54 | drop table [wwi_perf].[Sale_Hash_Projection_Big] 55 | 56 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash_Projection_Big2]') IS NOT NULL 57 | drop table [wwi_perf].[Sale_Hash_Projection_Big2] 58 | 59 | IF OBJECT_ID(N'[wwi_perf].[Sale_Hash_v2]') IS NOT NULL 60 | drop table [wwi_perf].[Sale_Hash_v2] 61 | 62 | IF OBJECT_ID(N'[wwi_ml].[ProductPCA]') IS NOT NULL 63 | drop table wwi_ml.ProductPCA 64 | 65 | IF OBJECT_ID(N'[wwi].[Recommendations]') IS NOT NULL 66 | drop table wwi.Recommendations 67 | 68 | IF OBJECT_ID(N'[wwi_external].[DailySalesCounts]') IS NOT NULL 69 | drop external table [wwi_external].DailySalesCounts 70 | 71 | IF OBJECT_ID(N'[wwi_external].[Sales]') IS NOT NULL 72 | drop external table [wwi_external].Sales 73 | 74 | IF OBJECT_ID(N'[external].[DailySalesCounts]') IS NOT NULL 75 | drop external table [external].DailySalesCounts 76 | 77 | IF OBJECT_ID(N'[external].[Sales]') IS NOT NULL 78 | drop external table [external].Sales 79 | 80 | IF OBJECT_ID(N'[wwi_ml].[MLModel]') IS NOT NULL 81 | truncate table [wwi_ml].[MLModel] 82 | 83 | -- DROP SCHEMAS 84 | 85 | IF EXISTS (select * from sys.schemas where name = 'wwi_external') 86 | DROP SCHEMA [wwi_external] 87 | 88 | IF EXISTS (select * from sys.schemas where name = 'external') 89 | DROP SCHEMA [external] 90 | 91 | IF EXISTS (select * from sys.schemas where name = 'wwi_staging') 92 | DROP SCHEMA [wwi_staging] 93 | 94 | -- DROP EXTERNAL FILE FORMATS 95 | 96 | IF EXISTS(select * from sys.external_file_formats where name = 'csv_dailysales') 97 | drop external file format csv_dailysales 98 | 99 | IF EXISTS(select * from sys.external_file_formats where name = 'ParquetFormat') 100 | drop external file format ParquetFormat 101 | 102 | -- DROP EXTERNAL DATA SOURCES 103 | 104 | IF EXISTS(select * from sys.external_data_sources where name = 'ABSS') 105 | drop external data source ABSS 106 | 107 | -- DROP WORKLOAD CLASSIFIERS 108 | 109 | IF EXISTS (SELECT * FROM sys.workload_management_workload_classifiers WHERE name = 'CEO') 110 | DROP WORKLOAD CLASSIFIER CEO; 111 | 112 | IF EXISTS (SELECT * FROM sys.workload_management_workload_classifiers where name = 'CEODreamDemo') 113 | DROP WORKLOAD CLASSIFIER CEODreamDemo 114 | 115 | IF EXISTS (SELECT * FROM sys.workload_management_workload_classifiers where name = 'HeavyLoader') 116 | DROP WORKLOAD Classifier HeavyLoader 117 | 118 | -- DROP WORKLOAD GROUPS 119 | 120 | IF EXISTS (SELECT * FROM sys.workload_management_workload_groups where name = 'BigDataLoad') 121 | DROP WORKLOAD GROUP BigDataLoad 122 | 123 | IF EXISTS (SELECT * FROM sys.workload_management_workload_groups where name = 'CEODemo') 124 | DROP WORKLOAD GROUP CEODemo 125 | 126 | -- DROP SECURITY POLICIES 127 | 128 | IF EXISTS (select * from sys.security_policies where name = 'SalesFilter') 129 | BEGIN 130 | ALTER SECURITY POLICY SalesFilter 131 | WITH (STATE = OFF); 132 | 133 | DROP SECURITY POLICY SalesFilter; 134 | END 135 | 136 | -- DROP FUNCTIONS 137 | 138 | IF EXISTS (select * from sys.sql_modules M join sys.objects O on M.object_id = O.object_id where name = 'fn_securitypredicate') 139 | DROP FUNCTION wwi_security.fn_securitypredicate 140 | 141 | -- DROP MASKED COLUMNS 142 | 143 | IF EXISTS ( 144 | select 145 | * 146 | from 147 | sys.masked_columns MC 148 | join sys.tables T ON 149 | MC.object_id = T.object_id 150 | join sys.schemas S ON 151 | T.schema_id = S.schema_id 152 | WHERE 153 | MC.is_masked = 1 154 | AND S.name = 'wwi_security' 155 | AND T.name = 'CustomerInfo' 156 | AND MC.name = 'CreditCard' 157 | ) 158 | BEGIN 159 | ALTER TABLE wwi_security.CustomerInfo 160 | ALTER COLUMN [CreditCard] DROP MASKED; 161 | END 162 | 163 | IF EXISTS ( 164 | select 165 | * 166 | from 167 | sys.masked_columns MC 168 | join sys.tables T ON 169 | MC.object_id = T.object_id 170 | join sys.schemas S ON 171 | T.schema_id = S.schema_id 172 | WHERE 173 | MC.is_masked = 1 174 | AND S.name = 'wwi_security' 175 | AND T.name = 'CustomerInfo' 176 | AND MC.name = 'Email' 177 | ) 178 | BEGIN 179 | ALTER TABLE wwi_security.CustomerInfo 180 | ALTER COLUMN [Email] DROP MASKED; 181 | END -------------------------------------------------------------------------------- /Synapse/setup/sql/_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/CSU-AzureLabs/b7efb31fdd47308a72f7007266031683be77efe1/Synapse/setup/sql/_test.json -------------------------------------------------------------------------------- /Synapse/setup/templates/blob_storage_linked_service.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "#LINKED_SERVICE_NAME#", 3 | "type": "Microsoft.Synapse/workspaces/linkedservices", 4 | "properties": { 5 | "annotations": [], 6 | "type": "AzureBlobStorage", 7 | "typeProperties": { 8 | "connectionString": "DefaultEndpointsProtocol=https;AccountName=#STORAGE_ACCOUNT_NAME#;AccountKey=#STORAGE_ACCOUNT_KEY#;EndpointSuffix=core.windows.net;" 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /Synapse/setup/templates/cosmos_db_linked_service.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "#LINKED_SERVICE_NAME#", 3 | "properties": { 4 | "annotations": [], 5 | "type": "CosmosDb", 6 | "typeProperties": { 7 | "connectionString": "AccountEndpoint=https://#COSMOSDB_ACCOUNT_NAME#.documents.azure.com:443/;Database=#COSMOSDB_DATABASE_NAME#;AccountKey=\"#COSMOSDB_ACCOUNT_KEY#\"" 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /Synapse/setup/templates/data_lake_linked_service.json: -------------------------------------------------------------------------------- 1 | { 2 | "name":"#LINKED_SERVICE_NAME#", 3 | "properties": { 4 | "annotations": [], 5 | "type":"AzureBlobFS", 6 | "typeProperties": { 7 | "url":"https://#STORAGE_ACCOUNT_NAME#.dfs.core.windows.net", 8 | "accountKey": { 9 | "type": "SecureString", 10 | "value":"#STORAGE_ACCOUNT_KEY#" 11 | } 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /Synapse/setup/templates/integration_runtime.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "#INTEGRATION_RUNTIME_NAME#", 3 | "properties": { 4 | "type": "Managed", 5 | "typeProperties": { 6 | "computeProperties": { 7 | "location": "AutoResolve", 8 | "dataFlowProperties": { 9 | "computeType": "MemoryOptimized", 10 | "coreCount": #CORE_COUNT#, 11 | "timeToLive": #TIME_TO_LIVE# 12 | } 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /Synapse/setup/templates/key_vault_linked_service.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "#LINKED_SERVICE_NAME#", 3 | "type": "Microsoft.Synapse/workspaces/linkedservices", 4 | "properties": { 5 | "annotations": [], 6 | "type": "AzureKeyVault", 7 | "typeProperties": { 8 | "baseUrl": "https://#KEY_VAULT_NAME#.vault.azure.net/" 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /Synapse/setup/templates/spark_notebook.json: -------------------------------------------------------------------------------- 1 | { 2 | "name":"#NOTEBOOK_NAME#", 3 | "properties":{ 4 | "nbformat":4, 5 | "nbformat_minor":2, 6 | "bigDataPool":{ 7 | "referenceName":"#SPARK_POOL_NAME#", 8 | "type":"BigDataPoolReference" 9 | }, 10 | "sessionProperties":{ 11 | "driverMemory":"28g", 12 | "driverCores":4, 13 | "executorMemory":"28g", 14 | "executorCores":4, 15 | "numExecutors":2 16 | }, 17 | "metadata":{ 18 | "language_info":{ 19 | "name":"python" 20 | }, 21 | "a365ComputeOptions":{ 22 | "id":"/subscriptions/#SUBSCRIPTION_ID#/resourceGroups/#RESOURCE_GROUP_NAME#/providers/Microsoft.Synapse/workspaces/#WORKSPACE_NAME#/bigDataPools/#SPARK_POOL_NAME#", 23 | "name":"#SPARK_POOL_NAME#", 24 | "type":"Spark", 25 | "endpoint":"https://#WORKSPACE_NAME#.dev.azuresynapse.net/livyApi/versions/2019-11-01-preview/sparkPools/#SPARK_POOL_NAME#", 26 | "auth":{ 27 | "type":"AAD", 28 | "authResource":"https://dev.azuresynapse.net" 29 | }, 30 | "sparkVersion":"2.4", 31 | "nodeCount":3, 32 | "cores":4, 33 | "memory":28, 34 | "extraHeader":{ 35 | 36 | } 37 | }, 38 | "sessionKeepAliveTimeout":60 39 | }, 40 | "cells":[ 41 | ] 42 | } 43 | } -------------------------------------------------------------------------------- /Synapse/setup/templates/spark_notebook_session.json: -------------------------------------------------------------------------------- 1 | { 2 | "kind":"pyspark", 3 | "name":"#SPARK_SESSION_NAME#", 4 | "driverMemory":"56g", 5 | "driverCores":8, 6 | "executorMemory":"56g", 7 | "executorCores":8, 8 | "numExecutors":4 9 | } -------------------------------------------------------------------------------- /Synapse/setup/templates/sql_pool_key_vault_linked_service.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "#LINKED_SERVICE_NAME#", 3 | "properties": { 4 | "annotations": [], 5 | "type": "AzureSqlDW", 6 | "typeProperties": { 7 | "connectionString": "Integrated Security=False;Encrypt=True;Connection Timeout=30;Data Source=#WORKSPACE_NAME#.sql.azuresynapse.net;Initial Catalog=#DATABASE_NAME#;User ID=#USER_NAME#", 8 | "password": { 9 | "type": "AzureKeyVaultSecret", 10 | "store": { 11 | "referenceName": "#KEY_VAULT_LINKED_SERVICE_NAME#", 12 | "type": "LinkedServiceReference" 13 | }, 14 | "secretName": "#SECRET_NAME#" 15 | } 16 | } 17 | } 18 | } -------------------------------------------------------------------------------- /Synapse/setup/templates/sql_pool_script.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "#SQL_SCRIPT_NAME#", 3 | "properties": { 4 | "content": { 5 | "query": "#SQL_SCRIPT#", 6 | "metadata": { 7 | "language": "sql" 8 | }, 9 | "currentConnection": { 10 | "name": "#SQL_POOL_NAME#", 11 | "type": "SqlPool" 12 | } 13 | }, 14 | "type": "SqlQuery" 15 | } 16 | } -------------------------------------------------------------------------------- /Synapse/setup/templates/sql_script.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "#SQL_SCRIPT_NAME#", 3 | "properties": { 4 | "content": { 5 | "query": "", 6 | "metadata": { 7 | "language":"sql" 8 | }, 9 | "currentConnection": { 10 | "name": "SQLPool01", 11 | "type":"SqlPool" 12 | } 13 | }, 14 | "type": "SqlQuery" 15 | } 16 | } -------------------------------------------------------------------------------- /readme: -------------------------------------------------------------------------------- 1 | Testing the create file in new repo 2 | --------------------------------------------------------------------------------