├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── pull_request_template.md ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── circle.yml ├── dbt_project.yml ├── etc └── dbt-logo.png ├── macros ├── compression.sql ├── ddl.sql ├── introspection.sql ├── queries.sql ├── redshift_maintenance_operation.sql ├── try_cast.sql ├── unload.sql └── utilities.sql ├── models ├── base │ ├── pg_attribute.sql │ ├── pg_class.sql │ ├── pg_depend.sql │ ├── pg_namespace.sql │ ├── pg_tables.sql │ ├── pg_user.sql │ ├── pg_views.sql │ ├── redshift_cost.sql │ ├── stl_explain.sql │ ├── stl_query.sql │ ├── stl_wlm_query.sql │ ├── stv_blocklist.sql │ ├── stv_partitions.sql │ ├── stv_tbl_perm.sql │ └── svv_diskusage.sql ├── introspection │ ├── redshift_columns.sql │ ├── redshift_constraints.sql │ ├── redshift_sort_dist_keys.sql │ └── redshift_tables.sql └── views │ ├── redshift_admin_dependencies.sql │ ├── redshift_admin_queries.sql │ ├── redshift_admin_table_stats.sql │ ├── redshift_admin_users_schema_privileges.sql │ └── redshift_admin_users_table_view_privileges.sql └── packages.yml /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @clrcrl -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a bug or an issue you've found with this package 4 | title: '' 5 | labels: bug, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Describe the bug 11 | 14 | 15 | ### Steps to reproduce 16 | 19 | 20 | ### Expected results 21 | 24 | 25 | ### Actual results 26 | 29 | 30 | ### Screenshots and log output 31 | 34 | 35 | ### System information 36 | **The contents of your `packages.yml` file:** 37 | 38 | **Which database are you using dbt with?** 39 | - [ ] postgres 40 | - [ ] redshift 41 | - [ ] bigquery 42 | - [ ] snowflake 43 | - [ ] other (specify: ____________) 44 | 45 | 46 | **The output of `dbt --version`:** 47 | ``` 48 | 49 | ``` 50 | 51 | **The operating system you're using:** 52 | 53 | **The output of `python --version`:** 54 | 55 | ### Additional context 56 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this package 4 | title: '' 5 | labels: enhancement, triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Describe the feature 11 | A clear and concise description of what you want to happen. 12 | 13 | ### Describe alternatives you've considered 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | ### Additional context 17 | Is this feature database-specific? Which database(s) is/are relevant? Please include any other relevant context here. 18 | 19 | ### Who will this benefit? 20 | What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly. -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Description & motivation 2 | 5 | 6 | ## Checklist 7 | - [ ] I have verified that these changes work locally 8 | - [ ] I have updated the README.md (if applicable) 9 | - [ ] I have added tests & descriptions to my models (and macros if applicable) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_modules/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # redshift 0.6.1 2 | ## Fixes 3 | - `redshift_maintenance()` macro now works if a custom `ref()` macro exists in the project ([#52](https://github.com/dbt-labs/redshift/issues/52), [#53](https://github.com/dbt-labs/redshift/pull/53)) ([@jeremyyeo](https://github.com/jeremyyeo)) 4 | 5 | # redshift 0.6.0 6 | 7 | This release supports any version (minor and patch) of v1, which means far less need for compatibility releases in the future. 8 | 9 | ## Under the hood 10 | - Change `require-dbt-version` to `[">=1.0.0", "<2.0.0"]` 11 | - Bump dbt-utils dependency 12 | - Replace `source-paths` with `model-paths` 13 | 14 | # redshift v0.5.1 15 | 🚨 This is a compatibility release in preparation for `dbt-core` v1.0.0 (🎉). Projects using this version with `dbt-core` v1.0.x can expect to see a deprecation warning. This will be resolved in the next minor release. 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | test: 4 | cd package-test 5 | dbt seed 6 | dbt run 7 | dbt test 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Redshift data models and utilities 2 | 3 | [dbt](https://www.getdbt.com) models for [Redshift](https://aws.amazon.com/redshift/) warehouses. 4 | 5 | ## Installation instructions 6 | 7 | 1. Include this package in your `packages.yml` -- check [here](https://hub.getdbt.com/dbt-labs/redshift/latest/) 8 | for installation instructions. 9 | 2. Run `dbt deps` 10 | 11 | ### Models 12 | 13 | This package provides a number of base models for Redshift system tables, as well as a few utility views that usefully combine the base models. 14 | 15 | __Base Models__ 16 | 17 | Each of these base models maps 1-to-1 with the underlying Redshift table. Some renaming has been performed to make the field names grokable. 18 | 19 | - pg_attribute 20 | - pg_class 21 | - pg_namespace 22 | - pg_user 23 | - [stl_explain](http://docs.aws.amazon.com/redshift/latest/dg/r_STL_EXPLAIN.html) 24 | - [stl_query](http://docs.aws.amazon.com/redshift/latest/dg/r_STL_QUERY.html) 25 | - [stl_wlm_query](http://docs.aws.amazon.com/redshift/latest/dg/r_STL_WLM_QUERY.html) 26 | - [stv_blocklist](http://docs.aws.amazon.com/redshift/latest/dg/r_STV_BLOCKLIST.html) 27 | - [stv_tbl_perm](http://docs.aws.amazon.com/redshift/latest/dg/r_STV_TBL_PERM.html) 28 | - [svv_diskusage](http://docs.aws.amazon.com/redshift/latest/dg/r_SVV_DISKUSAGE.html) 29 | 30 | __Ephemeral Models__ 31 | 32 | These ephemeral models simplify some of Redshift's field naming and logic, to make the data more usable. 33 | 34 | - redshift_cost: transforms the start and max explain cost values from stl_explain into floating point values 35 | 36 | __View Models__ 37 | 38 | These views are designed to make debugging your Redshift cluster more straightforward. They are, in effect, materializations of the [Diagnostic Queries for Query Tuning](http://docs.aws.amazon.com/redshift/latest/dg/diagnostic-queries-for-query-tuning.html) from Redshift's documentation. 39 | 40 | - [redshift_admin_queries](models/views/redshift_admin_queries.sql): Simplified view of queries, including explain cost, execution times, and queue times. 41 | - [redshift_admin_table_stats](models/views/redshift_admin_table_stats.sql): Gives insight on tables in your warehouse. Includes information on sort and dist keys, table size on disk, and more. 42 | - [redshift_admin_dependencies](models/views/redshift_admin_dependencies.sql): Simplified view of pg_depend, showing any dependent objects (views) for a given source object 43 | 44 | These views are designed to make user privilege management more straightforward. 45 | - [users_table_view_privileges](models/views/users_table_view_privileges.sql): Gives insights into which [privileges](https://docs.aws.amazon.com/redshift/latest/dg/r_HAS_TABLE_PRIVILEGE.html) each user has on each table/view. 46 | - [users_schema_privileges](models/views/users_schema_privileges.sql): Gives insights into which [privileges](https://docs.aws.amazon.com/redshift/latest/dg/r_HAS_SCHEMA_PRIVILEGE.html) each user has on each schema. 47 | 48 | __Introspection Models__ 49 | 50 | These models (default ephemeral) make it possible to inspect tables, columns, constraints, and sort/dist keys of the Redshift cluster. These models are used to build column compression queries, but may also be generally useful. 51 | 52 | - [redshift_tables](models/introspection/redshift_tables.sql) 53 | - [redshift_columns](models/introspection/redshift_columns.sql) 54 | - [redshift_constraints](models/introspection/redshift_constraints.sql) 55 | - [redshift_sort_dist_keys](models/introspection/redshift_sort_dist_keys.sql) 56 | 57 | 58 | ## Macros 59 | 60 | #### compress_table ([source](macros/compression.sql)) 61 | 62 | This macro returns the SQL required to auto-compress a table using the results of an `analyze compression` query. All comments, constraints, keys, and indexes are copied to the newly compressed table by this macro. Additionally, sort and dist keys can be provided to override the settings from the source table. By default, a backup table is made which is _not_ deleted. To delete this backup table after a successful copy, use `drop_backup` flag. 63 | Note, that this macro will have to rewrite the entire table. The `skip_if_incremental` flag can be used to skip this macro if it is being called in an `is_incremental()` context, e.g. when used as a post-hook on an incremental model. 64 | 65 | Macro signature: 66 | ``` 67 | {{ compress_table(schema, table, 68 | drop_backup=False, 69 | comprows=none|Integer, 70 | sort_style=none|compound|interleaved, 71 | sort_keys=none|List, 72 | dist_style=none|all|even, 73 | dist_key=none|String, 74 | skip_if_incremental=False) }} 75 | ``` 76 | 77 | Example usage: 78 | ``` 79 | {{ 80 | config({ 81 | "materialized":"table", 82 | "sort": "id", 83 | "dist": "id", 84 | "post-hook": [ 85 | "{{ redshift.compress_table(this.schema, this.table, drop_backup=False) }}" 86 | ] 87 | }) 88 | }} 89 | ``` 90 | 91 | #### unload_table ([source](macros/unload.sql)) 92 | 93 | This macro returns the SQL required to unload a Redshift table to one or more files on S3. The macro replicates all functionality provided by Redshift's [UNLOAD](http://docs.aws.amazon.com/redshift/latest/dg/r_UNLOAD.html) command. 94 | 95 | Macro signature: 96 | ``` 97 | {{ unload_table(schema, 98 | table, 99 | s3_path, 100 | iam_role=None|String, 101 | aws_key=None|String, 102 | aws_secret=None|String, 103 | aws_token=None|String, 104 | aws_region=None|String, 105 | manifest=Boolean, 106 | header=Boolean, 107 | format=None|String, 108 | delimiter=String, 109 | null_as=String, 110 | max_file_size=String, 111 | escape=Boolean, 112 | compression=None|GZIP|BZIP2, 113 | add_quotes=Boolean, 114 | encrypted=Boolean, 115 | overwrite=Boolean, 116 | cleanpath=Boolean, 117 | parallel=Boolean, 118 | partition_by=none|List, 119 | extension=None|String 120 | ) }} 121 | ``` 122 | 123 | Example usage: 124 | ``` 125 | {{ 126 | config({ 127 | "materialized":"table", 128 | "sort": "id", 129 | "dist": "id", 130 | "post-hook": [ 131 | "{{ redshift.unload_table(this.schema, 132 | this.table, 133 | s3_path='s3://bucket/folder', 134 | aws_key='abcdef', 135 | aws_secret='ghijklm', 136 | header=True, 137 | delimiter='|') }}" 138 | ] 139 | }) 140 | }} 141 | ``` 142 | 143 | #### redshift_maintenance_operation ([source](macros/redshift_maintenance_operation.sql)) 144 | 145 | This macro is intended to be run as an [operation](https://docs.getdbt.com/docs/using-operations). It vacuums and analyzes each table, with verbose logging. 146 | 147 | The user who runs this operation must be a super user. 148 | ``` 149 | $ dbt run-operation redshift_maintenance 150 | Running with dbt=0.14.2 151 | 06:35:33 + 1 of 478 Vacuuming "analytics"."customer_orders" 152 | 06:35:33 + 1 of 478 Analyzing "analytics"."customer_orders" 153 | 06:35:33 + 1 of 478 Finished "analytics"."customer_orders" in 0.29s 154 | 06:35:33 + 2 of 478 Vacuuming "analytics"."customer_payments" 155 | 06:35:33 + 2 of 478 Analyzing "analytics"."customer_payments" 156 | 06:35:33 + 2 of 478 Finished "analytics"."customer_payments" in 0.28s 157 | ``` 158 | 159 | The command can also be run with optional parameters to exclude schemas, either with exact or regex matching. This can be useful for cases where the models (or the schemas themselves) tend to be short-lived and don't require vacuuming. For example: 160 | ``` 161 | $ dbt run-operation redshift_maintenance --args '{exclude_schemas: ["looker_scratch"], exclude_schemas_like: ["sinter\\_pr\\_%"]}' 162 | ``` 163 | You can also implement your own query to choose which tables to vacuum. To do so, 164 | create a macro in your **own project** named `vacuumable_tables_sql`, following 165 | the same pattern as the macro in [this package](macros/redshift_maintenance_operation.sql). 166 | Here's an example: 167 | ```sql 168 | -- my_project/macros/redshift_maintenance_operation.sql 169 | {% macro vacuumable_tables_sql() %} 170 | {%- set limit=kwargs.get('limit') -%} 171 | select 172 | current_database() as table_database, 173 | table_schema, 174 | table_name 175 | from information_schema.tables 176 | where table_type = 'BASE TABLE' 177 | 178 | order by table_schema, table_name 179 | {% if limit %} 180 | limit ~ {{ limit }} 181 | {% endif %} 182 | {% endmacro %} 183 | ``` 184 | When you run the `redshift_maintenance` macro, your version of `vacuumable_tables_sql` 185 | will be respected. You can also add arguments to your version of `vacuumable_tables_sql` 186 | by following the pattern in the `vacuumable_tables_sql` macro in this package. 187 | 188 | Note: This macro will skip any relations that are dropped in the time betwen running 189 | the initial query, and the point at which you try to vacuum it. This results in 190 | a message like so: 191 | ``` 192 | 13:18:22 + 1 of 157 Skipping relation "analytics"."dbt_claire"."amazon_orders" as it does not exist 193 | ``` 194 | 195 | ### Contributing 196 | Additional contributions to this repo are very welcome! Check out [this post](https://discourse.getdbt.com/t/contributing-to-a-dbt-package/657) on the best workflow for contributing to a package. All PRs should only include functionality that is contained within all Redshift deployments; no implementation-specific details should be included. 197 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | machine: 2 | timezone: 3 | America/New_York 4 | 5 | python: 6 | version: 7 | 3.9.9 8 | 9 | dependencies: 10 | pre: 11 | - pip install dbt 12 | 13 | test: 14 | override: 15 | - dbt seed 16 | - dbt run 17 | - dbt test 18 | -------------------------------------------------------------------------------- /dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'redshift' 2 | version: '0.5.0' 3 | config-version: 2 4 | 5 | require-dbt-version: [">=1.0.0", "<2.0.0"] 6 | 7 | model-paths: ["models"] 8 | macro-paths: ["macros"] 9 | test-paths: ["tests"] 10 | 11 | profile: redshift_package 12 | 13 | models: 14 | redshift: 15 | base: 16 | +materialized: ephemeral 17 | introspection: 18 | +materialized: ephemeral 19 | views: 20 | +materialized: view 21 | -------------------------------------------------------------------------------- /etc/dbt-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dbt-labs/redshift/ad9e686eefe8b15ed47c134799c695ae58767672/etc/dbt-logo.png -------------------------------------------------------------------------------- /macros/compression.sql: -------------------------------------------------------------------------------- 1 | {% macro find_analyze_recommendations(schema, table, comprows=none) %} 2 | 3 | {% set comprows_s = '' if comprows is none else 'comprows ' ~ comprows %} 4 | {% set query %} 5 | analyze compression "{{ schema }}"."{{ table }}" {{ comprows_s }} 6 | {% endset %} 7 | 8 | {% set columns = redshift.get_data(query, ['table', 'column', 'encoding', 'reduction_pct']) %} 9 | 10 | {% set ret = {} %} 11 | {% for column in columns %} 12 | {%- set _ = ret.update({column.column: column}) -%} 13 | {% endfor %} 14 | 15 | {{ return(ret) }} 16 | 17 | {% endmacro %} 18 | 19 | {% macro build_optimized_definition(definition, recommendation) -%} 20 | 21 | {% set optimized = {} %} 22 | {% set _ = optimized.update(definition) %} 23 | {% for name, column in definition['columns'].items() %} 24 | {% set recommended_encoding = recommendation[name] %} 25 | 26 | {% if recommended_encoding['encoding'] != column['encoding'] %} 27 | {{ log(" Changing " ~ name ~ ": " ~ column['encoding'] ~ " -> " ~ recommended_encoding['encoding'] ~ " (" ~ recommended_encoding['reduction_pct'] ~ "%)") }} 28 | {% else %} 29 | {{ log("Not Changing " ~ name ~ ": " ~ column['encoding']) }} 30 | {% endif %} 31 | 32 | {% set _ = optimized['columns'][name].update({"encoding": recommended_encoding['encoding']}) %} 33 | {% endfor %} 34 | 35 | {{ return(optimized) }} 36 | 37 | {%- endmacro %} 38 | 39 | {%- macro insert_into_sql(from_schema, from_table, to_schema, to_table) -%} 40 | 41 | insert into "{{ to_schema }}"."{{ to_table }}" ( 42 | select * from "{{ from_schema }}"."{{ from_table }}" 43 | ); 44 | 45 | {%- endmacro -%} 46 | 47 | {%- macro atomic_swap_sql(schema, from_table, to_table, drop_backup) -%} 48 | 49 | begin; 50 | -- drop table if exists "{{ schema }}"."{{ from_table }}__backup" cascade; 51 | alter table "{{ schema }}"."{{ from_table }}" rename to "{{ from_table }}__backup"; 52 | alter table "{{ schema }}"."{{ to_table }}" rename to "{{ from_table }}"; 53 | {% if drop_backup %} 54 | drop table "{{ schema }}"."{{ from_table }}__backup" cascade; 55 | {% else %} 56 | {{ log('drop_backup is False -- not dropping ' ~ from_table ~ "__backup") }} 57 | {% endif %} 58 | commit; 59 | 60 | {%- endmacro -%} 61 | 62 | {%- macro compress_table(schema, table, drop_backup=False, 63 | comprows=none, sort_style=none, sort_keys=none, 64 | dist_style=none, dist_key=none, skip_if_incremental=False) -%} 65 | 66 | {% if not execute %} 67 | {{ return(none) }} 68 | {% endif %} 69 | 70 | {% if skip_if_incremental and is_incremental() %} 71 | {{ return('') }} 72 | {% endif %} 73 | 74 | {% set recommendation = redshift.find_analyze_recommendations(schema, table, comprows) %} 75 | {% set definition = redshift.fetch_table_definition(schema, table) %} 76 | 77 | {% if definition is none %} 78 | {{ return(none) }} 79 | {% endif %} 80 | 81 | {% set optimized = redshift.build_optimized_definition(definition, recommendation) %} 82 | 83 | {% set _ = optimized.update({"keys": optimized.get('keys', {}) | default({})}) %} 84 | {% if sort_style %} {% set _ = optimized['keys'].update({"sort_style": sort_style}) %} {% endif %} 85 | {% if sort_keys %} {% set _ = optimized['keys'].update({"sort_keys": sort_keys}) %} {% endif %} 86 | {% if dist_style %} {% set _ = optimized['keys'].update({"dist_style": dist_style}) %} {% endif %} 87 | {% if dist_key %} {% set _ = optimized['keys'].update({"dist_key": dist_key}) %} {% endif %} 88 | 89 | {% set new_table = table ~ "__compressed" %} 90 | {% set _ = optimized.update({'name': new_table}) %} 91 | 92 | {# Build the DDL #} 93 | {{ redshift.build_ddl_sql(optimized) }} 94 | {{ redshift.insert_into_sql(schema, table, schema, new_table) }} 95 | {{ redshift.atomic_swap_sql(schema, table, new_table, drop_backup) }} 96 | 97 | {%- endmacro %} 98 | -------------------------------------------------------------------------------- /macros/ddl.sql: -------------------------------------------------------------------------------- 1 | 2 | {# 3 | CREATE [ [LOCAL ] { TEMPORARY | TEMP } ] TABLE 4 | [ IF NOT EXISTS ] table_name 5 | ( { column_name data_type [column_attributes] [ column_constraints ] 6 | | table_constraints 7 | | LIKE parent_table [ { INCLUDING | EXCLUDING } DEFAULTS ] } 8 | [, ... ] ) 9 | [ BACKUP { YES | NO } ] 10 | [table_attribute] 11 | 12 | where column_attributes are: 13 | [ DEFAULT default_expr ] 14 | [ IDENTITY ( seed, step ) ] 15 | [ ENCODE encoding ] 16 | [ DISTKEY ] 17 | [ SORTKEY ] 18 | 19 | and column_constraints are: 20 | [ { NOT NULL | NULL } ] 21 | [ { UNIQUE | PRIMARY KEY } ] 22 | [ REFERENCES reftable [ ( refcolumn ) ] ] 23 | 24 | and table_constraints are: 25 | [ UNIQUE ( column_name [, ... ] ) ] 26 | [ PRIMARY KEY ( column_name [, ... ] ) ] 27 | [ FOREIGN KEY (column_name [, ... ] ) REFERENCES reftable [ ( refcolumn ) ] 28 | 29 | and table_attributes are: 30 | [ DISTSTYLE { EVEN | KEY | ALL } ] 31 | [ DISTKEY ( column_name ) ] 32 | [ [COMPOUND | INTERLEAVED ] SORTKEY ( column_name [, ...] ) ] 33 | #} 34 | 35 | {% macro build_ddl_sql(def) %} 36 | 37 | -- DROP 38 | drop table if exists "{{ def['schema'] }}"."{{ def['name'] }}"; 39 | -- CREATE 40 | create table "{{ def['schema'] }}"."{{ def['name'] }}" ( 41 | -- COLUMNS 42 | {% for column in def['columns'].values() | sort(attribute='position') -%} 43 | "{{ column['name'] }}" {{ column['type'] }} 44 | {%- if column['encoding'] is not none %} encode {{ column['encoding'] }} {% endif -%} 45 | {%- if column['default'] is not none %} default {{ column['default'] }} {% endif -%} 46 | {%- if column['not_null'] %} not null {% endif -%} 47 | {%- if not loop.last %}, {% endif %} 48 | {% endfor %} 49 | 50 | -- CONSTRAINTS 51 | {% for constraint in def['constraints'] -%} 52 | , {{ constraint['col_constraint'] }} 53 | {% endfor %} 54 | ) 55 | 56 | --KEYS 57 | {% if def['keys'] %} 58 | {% set dist_style = def['keys']['dist_style'] %} 59 | {% set dist_key = def['keys']['dist_key'] %} 60 | -- DIST 61 | {% if dist_style %} diststyle {{ dist_style }} {% endif %} 62 | {% if dist_key %} distkey("{{ dist_key }}") {% endif %} 63 | 64 | -- SORT 65 | {% set sort_style = def['keys']['sort_style'] %} 66 | {% set sort_keys = def['keys']['sort_keys'] %} 67 | {% set sort_keys_s = sort_keys | join('", "') %} 68 | {% if sort_keys %} {{ sort_style }} sortkey("{{ sort_keys_s }}") {% endif %} 69 | {% endif %} 70 | ; 71 | 72 | -- TABLE COMMENT 73 | {% if def['description'] %} 74 | comment on table "{{ def['schema'] }}"."{{ def['name'] }}" is '{{ def["description"] }}'; 75 | {% endif %} 76 | 77 | -- COLUMN COMMENTS 78 | {% for column in def['columns'].values() -%} 79 | {%- if column['description'] -%} 80 | comment on column "{{ def['schema'] }}"."{{ def['name'] }}"."{{ column['name'] }}" is '{{ column["description"] }}'; 81 | 82 | {% endif -%} 83 | {% endfor %} 84 | 85 | {% endmacro %} 86 | -------------------------------------------------------------------------------- /macros/introspection.sql: -------------------------------------------------------------------------------- 1 | 2 | {% macro get_data(query, columns) %} 3 | 4 | {%- call statement('_', fetch_result=True) %} 5 | {{ query }} 6 | {% endcall %} 7 | 8 | {%- set records = load_result('_') -%} 9 | 10 | {% if not records %} 11 | {{ return([]) }} 12 | {% endif %} 13 | 14 | {%- set ret = [] -%} 15 | {% for record in records['data'] %} 16 | {% set processing = {} %} 17 | {% for column in columns %} 18 | {% set _ = processing.update({column: record[loop.index0]}) %} 19 | {% endfor %} 20 | {% set _ = ret.append(processing) %} 21 | {% endfor %} 22 | 23 | {{ return(ret) }} 24 | 25 | {% endmacro %} 26 | 27 | 28 | {% macro fetch_table_data(schema_name, table_name) %} 29 | 30 | {% set sql %} 31 | select 32 | schemaname, 33 | tablename, 34 | description, 35 | relation_type 36 | from ({{ redshift.fetch_table_data_sql() }}) 37 | where schemaname = '{{ schema_name }}' 38 | and tablename = '{{ table_name }}' 39 | {% endset %} 40 | 41 | {% set table = redshift.get_data(sql, ['schema', 'name', 'description', 'type']) %} 42 | {{ return(table) }} 43 | 44 | {% endmacro %} 45 | 46 | {% macro fetch_column_data(schema_name, table_name) %} 47 | 48 | {% set sql %} 49 | select 50 | col_index, 51 | col_name, 52 | description, 53 | col_datatype, 54 | col_encoding, 55 | col_default, 56 | col_not_null 57 | from ({{ redshift.fetch_column_data_sql() }}) 58 | where schemaname = '{{ schema_name }}' 59 | and tablename = '{{ table_name }}' 60 | {% endset %} 61 | 62 | {% set columns = redshift.get_data(sql, ['position', 'name', 'description', 'type', 'encoding', 'default', 'not_null']) %} 63 | 64 | {% set ret = {} %} 65 | {% for column in columns %} 66 | {%- set _ = ret.update({column.name: column}) -%} 67 | {% endfor %} 68 | 69 | {{ return(ret) }} 70 | 71 | {% endmacro %} 72 | 73 | {% macro fetch_sort_dist_key_data(schema_name, table_name) %} 74 | 75 | {% set sql %} 76 | select 77 | sort_style, 78 | sort_keys, 79 | diststyle, 80 | dist_key 81 | from ({{ redshift.fetch_sort_dist_key_data_sql() }}) 82 | where schemaname = '{{ schema_name }}' 83 | and tablename = '{{ table_name }}' 84 | {% endset %} 85 | 86 | {% set keys = redshift.get_data(sql, ['sort_style', 'sort_keys', 'dist_style', 'dist_key']) %} 87 | {% for key in keys %} 88 | {% set _ = key.update({'sort_keys': key['sort_keys'].split('|')}) %} 89 | {% endfor %} 90 | 91 | {{ return(keys) }} 92 | 93 | {% endmacro %} 94 | 95 | {% macro fetch_constraints(schema_name, table_name) %} 96 | 97 | {% set sql %} 98 | select 99 | constraint_type, 100 | col_constraint 101 | from ({{ redshift.fetch_constraint_data_sql() }}) 102 | where schemaname = '{{ schema_name }}' 103 | and tablename = '{{ table_name }}' 104 | {% endset %} 105 | 106 | {% set constraints = redshift.get_data(sql, ['constraint_type', 'col_constraint']) %} 107 | {{ return(constraints) }} 108 | 109 | {% endmacro %} 110 | 111 | 112 | {% macro fetch_table_definition(schema_name, table_name) %} 113 | {% set tables = redshift.fetch_table_data(schema_name, table_name) %} 114 | 115 | {% if (tables | length) == 0 %} 116 | {{ return(none) }} 117 | {% elif (tables | length) > 1 %} 118 | {{ log(tables) }} 119 | {{ exceptions.raise_compiler_error("Expected one table") }} 120 | {% endif %} 121 | 122 | {% set table = tables[0] %} 123 | {% set columns = redshift.fetch_column_data(schema_name, table_name) %} 124 | {% set keys = redshift.fetch_sort_dist_key_data(schema_name, table_name) | first %} 125 | {% set constraints = redshift.fetch_constraints(schema_name, table_name) %} 126 | 127 | {% set _ = table.update({"columns": columns}) %} 128 | {% set _ = table.update({"keys": keys}) %} 129 | {% set _ = table.update({"constraints": constraints}) %} 130 | 131 | {{ return(table) }} 132 | 133 | {% endmacro %} 134 | -------------------------------------------------------------------------------- /macros/queries.sql: -------------------------------------------------------------------------------- 1 | 2 | {# 3 | -- These macros should be models, but limitations in the 0.9.0 implementation 4 | -- of `ref` make this infeasible. TODO - move this logic directly into models 5 | #} 6 | 7 | {% macro fetch_table_data_sql() %} 8 | 9 | select 10 | n.nspname AS schemaname, 11 | c.relname AS tablename, 12 | d.description, 13 | case 14 | when c.relkind = 'v' then 'view' 15 | when c.relkind = 'r' then 'table' 16 | else null 17 | end as relation_type 18 | 19 | from pg_catalog.pg_namespace n 20 | join pg_catalog.pg_class c on n.oid = c.relnamespace 21 | left outer join pg_catalog.pg_description d ON (d.objoid = c.oid AND d.objsubid = 0) 22 | 23 | where schemaname not like 'pg_%' 24 | and schemaname != 'information_schema' 25 | and relkind in ('v', 'r') 26 | 27 | {% endmacro %} 28 | 29 | {% macro fetch_column_data_sql() %} 30 | 31 | with columns as ( 32 | select 33 | n.nspname as schemaname, 34 | c.relname as tablename, 35 | a.attnum as col_index, 36 | a.attname as col_name, 37 | d.description, 38 | 39 | case 40 | when strpos(upper(format_type(a.atttypid, a.atttypmod)), 'CHARACTER VARYING') > 0 41 | then replace(upper(format_type(a.atttypid, a.atttypmod)), 'CHARACTER VARYING', 'VARCHAR') 42 | 43 | when strpos(upper(format_type(a.atttypid, a.atttypmod)), 'CHARACTER') > 0 44 | then replace(upper(format_type(a.atttypid, a.atttypmod)), 'CHARACTER', 'CHAR') 45 | 46 | else upper(format_type(a.atttypid, a.atttypmod)) 47 | end as col_datatype, 48 | 49 | case 50 | when format_encoding((a.attencodingtype)::integer) = 'none' then 'raw' 51 | else format_encoding((a.attencodingtype)::integer) 52 | end as col_encoding, 53 | 54 | case 55 | when a.atthasdef is true then adef.adsrc 56 | else null 57 | end as col_default, 58 | 59 | a.attnotnull as col_not_null 60 | 61 | from pg_catalog.pg_namespace as n 62 | inner join pg_catalog.pg_class as c on n.oid = c.relnamespace 63 | inner join pg_catalog.pg_attribute as a on c.oid = a.attrelid 64 | left outer join pg_catalog.pg_description as d ON (d.objoid = a.attrelid AND d.objsubid = a.attnum) 65 | left outer join pg_catalog.pg_attrdef as adef on a.attrelid = adef.adrelid and a.attnum = adef.adnum 66 | where c.relkind = 'r' 67 | and a.attnum > 0 68 | 69 | ) 70 | select * 71 | from columns 72 | 73 | {% endmacro %} 74 | 75 | {% macro fetch_constraint_data_sql() %} 76 | 77 | select 78 | c.nspname as schemaname, 79 | b.relname as tablename, 80 | case 81 | when a.contype = 'p' then 'primary key' 82 | when a.contype = 'u' then 'unique' 83 | when a.contype = 'f' then 'foreign key' 84 | else null 85 | end as constraint_type, 86 | pg_get_constraintdef(a.oid) as col_constraint 87 | 88 | from pg_catalog.pg_constraint a 89 | join pg_catalog.pg_class b on(a.conrelid=b.oid) 90 | join pg_catalog.pg_namespace c on(a.connamespace=c.oid) 91 | where a.contype in ('p', 'u', 'f') 92 | 93 | {% endmacro %} 94 | 95 | 96 | 97 | {% macro fetch_sort_dist_key_data_sql() %} 98 | 99 | with dist_config as ( 100 | 101 | -- gets distyle and distkey (if there is one) 102 | select distinct 103 | trim(n.nspname) as schemaname, 104 | trim(c.relname) as tablename, 105 | 106 | case 107 | when c.reldiststyle = 0 then 'even' 108 | when c.reldiststyle = 1 then 'key' 109 | when c.reldiststyle = 8 then 'all' 110 | else null 111 | end as diststyle, 112 | 113 | max(case when c.reldiststyle = 1 and a.attisdistkey IS TRUE and a.attnum > 0 then a.attname else null end) over (partition by n.nspname, c.relname) as dist_key 114 | 115 | from pg_catalog.pg_namespace as n 116 | inner join pg_catalog.pg_class as c on n.oid = c.relnamespace 117 | inner join pg_catalog.pg_attribute as a on c.oid = a.attrelid 118 | where c.relkind = 'r' 119 | 120 | ), 121 | 122 | sort_config as ( 123 | 124 | -- get sortstyle and sortkeys 125 | select distinct 126 | trim(n.nspname) as schemaname, 127 | trim(c.relname) as tablename, 128 | case 129 | when min(a.attsortkeyord) over (partition by n.nspname, c.relname) = -1 then 'interleaved' 130 | else 'compound' 131 | end as sort_style, 132 | listagg(a.attname, '|') within group (order by a.attsortkeyord) over (partition by n.nspname, c.relname) as sort_keys 133 | 134 | from pg_catalog.pg_namespace as n 135 | inner join pg_catalog.pg_class as c on n.oid = c.relnamespace 136 | inner join pg_catalog.pg_attribute as a on c.oid = a.attrelid 137 | where c.relkind = 'r' 138 | and abs(a.attsortkeyord) > 0 139 | and a.attnum > 0 140 | ) 141 | 142 | select * 143 | from sort_config 144 | join dist_config using (schemaname, tablename) 145 | 146 | {% endmacro %} 147 | -------------------------------------------------------------------------------- /macros/redshift_maintenance_operation.sql: -------------------------------------------------------------------------------- 1 | {% macro vacuumable_tables_sql() %} 2 | {#- 3 | Pull the arguments out of the kwargs dictionary. This allows folks to define 4 | whatever arguments they want, e.g. a variable limit 5 | -#} 6 | {%- set exclude_schemas=kwargs.get('exclude_schemas', []) -%} 7 | {%- set exclude_schemas_like=kwargs.get('exclude_schemas_like', []) -%} 8 | 9 | select 10 | current_database() as table_database, 11 | table_schema, 12 | table_name 13 | from information_schema.tables 14 | where table_type = 'BASE TABLE' 15 | and table_schema not in ('information_schema', 'pg_catalog') 16 | {% if exclude_schemas %} 17 | and table_schema not in ('{{exclude_schemas | join("', '")}}') 18 | {% endif %} 19 | {% for exclude_schema_like in exclude_schemas_like %} 20 | and table_schema not like '{{ exclude_schema_like }}' 21 | {% endfor %} 22 | order by table_schema, table_name 23 | {% endmacro %} 24 | 25 | {% macro redshift_maintenance() %} 26 | {#- 27 | This logic means that if you add your own macro named `vacuumable_tables_sql` 28 | to your project, that will be used, giving you the flexibility of defining 29 | your own query. Passing it the `kwargs` variable means you can define your 30 | own keyword arguments. 31 | -#} 32 | {% set root_project = context.project_name %} 33 | {% if context.get(root_project, {}).get('vacuumable_tables_sql') %} 34 | {% set vacuumable_tables_sql=context[root_project].vacuumable_tables_sql(**kwargs) %} 35 | {% else %} 36 | {% set vacuumable_tables_sql=redshift.vacuumable_tables_sql(**kwargs) %} 37 | {% endif %} 38 | 39 | {% set vacuumable_tables=run_query(vacuumable_tables_sql) %} 40 | 41 | {% for row in vacuumable_tables %} 42 | {% set message_prefix=loop.index ~ " of " ~ loop.length %} 43 | 44 | {%- set relation_to_vacuum = adapter.get_relation( 45 | database=row['table_database'], 46 | schema=row['table_schema'], 47 | identifier=row['table_name'] 48 | ) -%} 49 | {% do run_query("commit") %} 50 | 51 | {% if relation_to_vacuum %} 52 | {% set start=modules.datetime.datetime.now() %} 53 | {{ dbt_utils.log_info(message_prefix ~ " Vacuuming " ~ relation_to_vacuum) }} 54 | {% do run_query("vacuum " ~ relation_to_vacuum) %} 55 | {{ dbt_utils.log_info(message_prefix ~ " Analyzing " ~ relation_to_vacuum) }} 56 | {% do run_query("analyze " ~ relation_to_vacuum) %} 57 | {% set end=modules.datetime.datetime.now() %} 58 | {% set total_seconds = (end - start).total_seconds() | round(2) %} 59 | {{ dbt_utils.log_info(message_prefix ~ " Finished " ~ relation_to_vacuum ~ " in " ~ total_seconds ~ "s") }} 60 | {% else %} 61 | {{ dbt_utils.log_info(message_prefix ~ ' Skipping relation "' ~ row.values() | join ('"."') ~ '" as it does not exist') }} 62 | {% endif %} 63 | 64 | {% endfor %} 65 | 66 | {% endmacro %} 67 | -------------------------------------------------------------------------------- /macros/try_cast.sql: -------------------------------------------------------------------------------- 1 | {%- macro try_cast(str, datatype) -%} 2 | 3 | {%- if datatype == 'bigint' or datatype == 'int' -%} 4 | 5 | case 6 | when trim({{str}}) ~ '^[0-9]+$' then trim({{str}}) 7 | else null 8 | end::{{datatype}} 9 | 10 | {% else %} 11 | 12 | {{ exceptions.raise_compiler_error( 13 | "non-integer datatypes are not currently supported") }} 14 | 15 | {% endif %} 16 | 17 | {%- endmacro -%} 18 | -------------------------------------------------------------------------------- /macros/unload.sql: -------------------------------------------------------------------------------- 1 | -- Redshift UNLOAD grammar (see: http://docs.aws.amazon.com/redshift/latest/dg/r_UNLOAD.html) 2 | {# 3 | UNLOAD ('select-statement') 4 | TO 's3://object-path/name-prefix' 5 | authorization 6 | [ option [ ... ] ] 7 | 8 | where option is 9 | { [ FORMAT [ AS ] ] CSV | PARQUET 10 | | PARTITION BY ( column_name [, ... ] ) [ INCLUDE ] 11 | | MANIFEST [ VERBOSE ] 12 | | HEADER 13 | | DELIMITER [ AS ] 'delimiter-char' 14 | | FIXEDWIDTH [ AS ] 'fixedwidth-spec' 15 | | ENCRYPTED [ AUTO ] 16 | | BZIP2 17 | | GZIP 18 | | ZSTD 19 | | ADDQUOTES 20 | | NULL [ AS ] 'null-string' 21 | | ESCAPE 22 | | ALLOWOVERWRITE 23 | | CLEANPATH 24 | | PARALLEL [ { ON | TRUE } | { OFF | FALSE } ] 25 | | MAXFILESIZE [AS] max-size [ MB | GB ] 26 | | REGION [AS] 'aws-region' } 27 | | EXTENSION 'extension-name' 28 | 29 | #} 30 | -- Unloads a Redshift table to S3 31 | {% macro unload_table(schema, 32 | table, 33 | s3_path, 34 | iam_role=None, 35 | aws_key=None, 36 | aws_secret=None, 37 | aws_region=None, 38 | aws_token=None, 39 | manifest=False, 40 | header=False, 41 | format=None, 42 | delimiter=",", 43 | null_as="", 44 | max_file_size='6 GB', 45 | escape=True, 46 | compression=None, 47 | add_quotes=False, 48 | encrypted=False, 49 | overwrite=False, 50 | cleanpath=False, 51 | parallel=False, 52 | partition_by=None, 53 | extension=None 54 | ) %} 55 | 56 | -- compile UNLOAD statement 57 | UNLOAD ('SELECT * FROM "{{ schema }}"."{{ table }}"') 58 | TO '{{ s3_path }}' 59 | {% if iam_role %} 60 | IAM_ROLE '{{ iam_role }}' 61 | {% elif aws_key and aws_secret %} 62 | ACCESS_KEY_ID '{{ aws_key }}' 63 | SECRET_ACCESS_KEY '{{ aws_secret }}' 64 | {% if aws_token %} 65 | SESSION_TOKEN '{{ aws_token }}' 66 | {% endif %} 67 | {% else %} 68 | -- Raise an error if authorization args are not present 69 | {{ exceptions.raise_compiler_error("You must provide AWS authorization parameters via 'iam_role' or 'aws_key' and 'aws_secret'.") }} 70 | {% endif %} 71 | {% if manifest %} 72 | MANIFEST VERBOSE 73 | {% endif %} 74 | {% if header %} 75 | HEADER 76 | {% endif %} 77 | {% if format %} 78 | FORMAT AS {{format|upper}} 79 | {% endif %} 80 | {% if not format %} 81 | DELIMITER AS '{{ delimiter }}' 82 | {% endif %} 83 | NULL AS '{{ null_as }}' 84 | MAXFILESIZE AS {{ max_file_size }} 85 | {% if escape %} 86 | ESCAPE 87 | {% endif %} 88 | {% if compression %} 89 | {{ compression|upper }} 90 | {% endif %} 91 | {% if add_quotes %} 92 | ADDQUOTES 93 | {% endif %} 94 | {% if encrypted %} 95 | ENCRYPTED 96 | {% endif %} 97 | {% if overwrite %} 98 | ALLOWOVERWRITE 99 | {% endif %} 100 | {% if cleanpath %} 101 | CLEANPATH 102 | {% endif %} 103 | {% if not parallel %} 104 | PARALLEL OFF 105 | {% endif %} 106 | {% if aws_region %} 107 | REGION '{{ aws_region }}' 108 | {% endif %} 109 | {% if partition_by %} 110 | PARTITION BY ( {{ partition_by | join(', ') }} ) 111 | {% endif %} 112 | {% if extension %} 113 | EXTENSION '{{ extension }}' 114 | {% endif %} 115 | {% endmacro %} 116 | -------------------------------------------------------------------------------- /macros/utilities.sql: -------------------------------------------------------------------------------- 1 | -- decodes pg_class.reldiststyle into 'even', 'all', or the distkey 2 | {% macro decode_reldiststyle(diststyle_field, distkey_field) -%} 3 | decode({{diststyle_field}}, 0, 'even', 4 | 1, {{distkey_field}}, 5 | 'all') 6 | {%- endmacro %} 7 | 8 | 9 | -- take percentage (including type casting) 10 | {% macro percentage(num, denom) -%} 11 | (case {{denom}} 12 | when 0 then 0 13 | else ({{num}}::float / {{denom}}::float) * 100.0 end) 14 | {%- endmacro %} 15 | -------------------------------------------------------------------------------- /models/base/pg_attribute.sql: -------------------------------------------------------------------------------- 1 | select 2 | attrelid 3 | , attname 4 | , atttypid 5 | , attstattarget 6 | , attlen 7 | , attnum 8 | , attndims 9 | , attcacheoff 10 | , atttypmod 11 | , attbyval 12 | , attstorage 13 | , attalign 14 | , attnotnull 15 | , atthasdef 16 | , attisdropped 17 | , attislocal 18 | , attinhcount 19 | , attisdistkey 20 | , attispreloaded 21 | , attsortkeyord 22 | , attencodingtype 23 | , attencrypttype 24 | , (case attisdistkey 25 | when 't' then attname 26 | else null end) as dist_key 27 | , (case attsortkeyord 28 | when 1 then attname 29 | else null end) as sort_key 30 | from pg_catalog.pg_attribute 31 | -------------------------------------------------------------------------------- /models/base/pg_class.sql: -------------------------------------------------------------------------------- 1 | select 2 | oid 3 | , relname 4 | , relnamespace 5 | , reltype 6 | , relowner 7 | , relam 8 | , relfilenode 9 | , reltablespace 10 | , relpages 11 | , reltuples 12 | , reltoastrelid 13 | , reltoastidxid 14 | , relhasindex 15 | , relisshared 16 | , relkind 17 | , relnatts 18 | , relexternid 19 | , relisreplicated 20 | , relispinned 21 | , reldiststyle 22 | , relprojbaseid 23 | , relchecks 24 | , reltriggers 25 | , relukeys 26 | , relfkeys 27 | , relrefs 28 | , relhasoids 29 | , relhaspkey 30 | , relhasrules 31 | , relhassubclass 32 | , relacl 33 | from pg_catalog.pg_class 34 | -------------------------------------------------------------------------------- /models/base/pg_depend.sql: -------------------------------------------------------------------------------- 1 | select 2 | classid 3 | , objid 4 | , objsubid 5 | , refclassid 6 | , refobjid 7 | , refobjsubid 8 | , deptype 9 | from pg_catalog.pg_depend 10 | -------------------------------------------------------------------------------- /models/base/pg_namespace.sql: -------------------------------------------------------------------------------- 1 | select 2 | oid 3 | , nspname 4 | , nspowner 5 | , nspacl 6 | from pg_catalog.pg_namespace 7 | -------------------------------------------------------------------------------- /models/base/pg_tables.sql: -------------------------------------------------------------------------------- 1 | select 2 | schemaname as schema_name 3 | , tablename as table_name 4 | , tableowner as table_owner 5 | , tablespace as table_space 6 | , hasindexes as has_indexes 7 | , hasrules as has_rules 8 | , hastriggers as has_triggers 9 | from pg_catalog.pg_tables 10 | -------------------------------------------------------------------------------- /models/base/pg_user.sql: -------------------------------------------------------------------------------- 1 | select 2 | 3 | usesysid as user_id 4 | , usename as username 5 | 6 | from pg_catalog.pg_user 7 | -------------------------------------------------------------------------------- /models/base/pg_views.sql: -------------------------------------------------------------------------------- 1 | select 2 | schemaname as schema_name 3 | , viewname as view_name 4 | , viewowner as view_owner 5 | from pg_catalog.pg_views 6 | -------------------------------------------------------------------------------- /models/base/redshift_cost.sql: -------------------------------------------------------------------------------- 1 | with stl_explain as ( 2 | 3 | select query_id, plannode from {{ref('stl_explain')}} 4 | where nodeid = 1 5 | 6 | ), parse_step_one as ( 7 | 8 | -- plannode (which contains cost) is formatted like: 9 | -- XN Seq Scan on nyc_last_update (cost=0.00..0.03 rows=2 width=40) 10 | -- we want to rip out the cost part (0.00, 0.03) and make it usable. 11 | -- cost_string after this step is "0.00..0.03 ..." 12 | select 13 | query_id 14 | , split_part(plannode, 'cost=', 2) as cost_string 15 | 16 | from stl_explain 17 | 18 | ), parse_step_two as ( 19 | 20 | select 21 | query_id 22 | , split_part(cost_string, '..', 1) as starting_cost 23 | , substring( 24 | split_part(cost_string, '..', 2) 25 | from 1 26 | for strpos(split_part(cost_string, '..', 2), ' ')) as total_cost 27 | 28 | from parse_step_one 29 | 30 | ) 31 | 32 | 33 | select 34 | 35 | query_id 36 | , starting_cost::float as starting_cost 37 | , total_cost::float as total_cost 38 | 39 | from parse_step_two 40 | -------------------------------------------------------------------------------- /models/base/stl_explain.sql: -------------------------------------------------------------------------------- 1 | select 2 | 3 | userid as user_id 4 | , query as query_id 5 | , nodeid 6 | , parentid 7 | , plannode 8 | , info 9 | 10 | from pg_catalog.stl_explain 11 | -------------------------------------------------------------------------------- /models/base/stl_query.sql: -------------------------------------------------------------------------------- 1 | select 2 | 3 | userid as user_id 4 | , query as query_id 5 | , xid as transaction_id 6 | , label 7 | , pid 8 | , database 9 | , starttime as started_at 10 | , endtime as finished_at 11 | , aborted 12 | 13 | from pg_catalog.stl_query 14 | -------------------------------------------------------------------------------- /models/base/stl_wlm_query.sql: -------------------------------------------------------------------------------- 1 | select 2 | 3 | userid as user_id 4 | , query as query_id 5 | , xid 6 | , task 7 | , service_class 8 | , slot_count 9 | , service_class_start_time 10 | , queue_start_time 11 | , queue_end_time 12 | , total_queue_time 13 | , exec_start_time 14 | , exec_end_time 15 | , total_exec_time 16 | , service_class_end_time 17 | , final_state 18 | 19 | from pg_catalog.stl_wlm_query 20 | -------------------------------------------------------------------------------- /models/base/stv_blocklist.sql: -------------------------------------------------------------------------------- 1 | select 2 | slice 3 | , col 4 | , tbl 5 | , blocknum 6 | , num_values 7 | , extended_limits 8 | , minvalue 9 | , maxvalue 10 | , sb_pos 11 | , pinned 12 | , on_disk 13 | , backed_up 14 | , modified 15 | , hdr_modified 16 | , unsorted 17 | , tombstone 18 | , preferred_diskno 19 | , temporary 20 | , newblock 21 | , num_readers 22 | , id 23 | , flags 24 | from pg_catalog.stv_blocklist 25 | -------------------------------------------------------------------------------- /models/base/stv_partitions.sql: -------------------------------------------------------------------------------- 1 | select 2 | 3 | owner 4 | , host 5 | , diskno 6 | , part_begin 7 | , part_end 8 | , used 9 | , tossed 10 | , capacity 11 | , "reads" 12 | , writes 13 | , seek_forward 14 | , seek_back 15 | , is_san 16 | , failed 17 | , mbps 18 | , mount 19 | 20 | from pg_catalog.stv_partitions 21 | -------------------------------------------------------------------------------- /models/base/stv_tbl_perm.sql: -------------------------------------------------------------------------------- 1 | select 2 | slice 3 | , id -- table id 4 | , name -- table name 5 | , rows 6 | , sorted_rows 7 | , (rows - sorted_rows) as unsorted_rows 8 | , temp 9 | , db_id 10 | , backup 11 | from pg_catalog.stv_tbl_perm 12 | -------------------------------------------------------------------------------- /models/base/svv_diskusage.sql: -------------------------------------------------------------------------------- 1 | select 2 | db_id 3 | , name 4 | , slice 5 | , col 6 | , tbl 7 | , blocknum 8 | , num_values 9 | , extended_limits 10 | , minvalue 11 | , maxvalue 12 | , sb_pos 13 | , pinned 14 | , on_disk 15 | , backed_up 16 | , modified 17 | , hdr_modified 18 | , unsorted 19 | , tombstone 20 | , preferred_diskno 21 | , temporary 22 | , newblock 23 | , num_readers 24 | , id 25 | , flags 26 | from pg_catalog.svv_diskusage 27 | -------------------------------------------------------------------------------- /models/introspection/redshift_columns.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ redshift.fetch_column_data_sql() }} 3 | -------------------------------------------------------------------------------- /models/introspection/redshift_constraints.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ redshift.fetch_constraint_data_sql() }} 3 | -------------------------------------------------------------------------------- /models/introspection/redshift_sort_dist_keys.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ redshift.fetch_sort_dist_key_data_sql() }} 3 | -------------------------------------------------------------------------------- /models/introspection/redshift_tables.sql: -------------------------------------------------------------------------------- 1 | 2 | {{ redshift.fetch_table_data_sql() }} 3 | -------------------------------------------------------------------------------- /models/views/redshift_admin_dependencies.sql: -------------------------------------------------------------------------------- 1 | {# SOURCE: https://github.com/awslabs/amazon-redshift-utils/blob/master/src/AdminViews/v_view_dependency.sql #} 2 | 3 | select distinct 4 | srcobj.oid as source_oid 5 | , srcnsp.nspname as source_schemaname 6 | , srcobj.relname as source_objectname 7 | , tgtobj.oid as dependent_oid 8 | , tgtnsp.nspname as dependent_schemaname 9 | , tgtobj.relname as dependent_objectname 10 | 11 | from 12 | 13 | {{ ref('pg_class') }} as srcobj 14 | join {{ ref('pg_depend') }} as srcdep on srcobj.oid = srcdep.refobjid 15 | join {{ ref('pg_depend') }} as tgtdep on srcdep.objid = tgtdep.objid 16 | join {{ ref('pg_class') }} as tgtobj 17 | on tgtdep.refobjid = tgtobj.oid 18 | and srcobj.oid <> tgtobj.oid 19 | left join {{ ref('pg_namespace') }} as srcnsp 20 | on srcobj.relnamespace = srcnsp.oid 21 | left join {{ ref('pg_namespace') }} tgtnsp on tgtobj.relnamespace = tgtnsp.oid 22 | 23 | where 24 | tgtdep.deptype = 'i' --dependency_internal 25 | and tgtobj.relkind = 'v' --i=index, v=view, s=sequence 26 | -------------------------------------------------------------------------------- /models/views/redshift_admin_queries.sql: -------------------------------------------------------------------------------- 1 | with queries as ( 2 | 3 | select * from {{ref('stl_query')}} 4 | 5 | ), users as ( 6 | 7 | select * from {{ref('pg_user')}} 8 | 9 | ), cost as ( 10 | 11 | select * from {{ref('redshift_cost')}} 12 | 13 | ), timings as ( 14 | 15 | select * from {{ref('stl_wlm_query')}} 16 | 17 | ) 18 | 19 | 20 | 21 | select 22 | 23 | queries.query_id 24 | , queries.transaction_id 25 | , users.username::varchar 26 | 27 | , cost.starting_cost 28 | , cost.total_cost 29 | 30 | , queries.started_at 31 | , queries.finished_at 32 | 33 | , timings.queue_start_time 34 | , timings.queue_end_time 35 | , (timings.total_queue_time::float / 1000000.0) as total_queue_time_seconds 36 | 37 | , timings.exec_start_time 38 | , timings.exec_end_time 39 | , (timings.total_exec_time::float / 1000000.0) as total_exec_time_seconds 40 | 41 | from queries 42 | 43 | left join users 44 | on queries.user_id = users.user_id 45 | 46 | left join cost 47 | on queries.query_id = cost.query_id 48 | 49 | left join timings 50 | on queries.query_id = timings.query_id 51 | -------------------------------------------------------------------------------- /models/views/redshift_admin_table_stats.sql: -------------------------------------------------------------------------------- 1 | with unsorted_by_table as ( 2 | 3 | select 4 | db_id 5 | , id as table_id 6 | , name as table_name 7 | , sum(rows) as rows 8 | , sum(unsorted_rows) as unsorted_rows 9 | from {{ref('stv_tbl_perm')}} 10 | group by 1, 2, 3 11 | 12 | ), pg_class as ( 13 | 14 | select * from {{ref('pg_class')}} 15 | 16 | ), pg_namespace as ( 17 | 18 | select * from {{ref('pg_namespace')}} 19 | 20 | ), table_sizes as ( 21 | 22 | select 23 | tbl as table_id 24 | , count(*) as size_in_megabytes 25 | from {{ref('stv_blocklist')}} 26 | group by 1 27 | 28 | ), table_attributes as ( 29 | 30 | select 31 | attrelid as table_id 32 | , min(dist_key) as dist_key 33 | , min(sort_key) as sort_key 34 | , max(attsortkeyord) as num_sort_keys 35 | , (max(attencodingtype) > 0) as is_encoded 36 | , max(attnum) as num_columns 37 | from {{ref('pg_attribute')}} 38 | group by 1 39 | 40 | ), slice_distribution as ( 41 | 42 | select 43 | tbl as table_id 44 | , trim(name) as name 45 | , slice 46 | , count(*) as size_in_megabytes 47 | 48 | from {{ref('svv_diskusage')}} 49 | group by 1, 2, 3 50 | 51 | ), capacity as ( 52 | 53 | select 54 | sum(capacity) as total_megabytes 55 | from {{ref('stv_partitions')}} 56 | where part_begin=0 57 | 58 | ), table_distribution_ratio as ( 59 | 60 | select 61 | table_id 62 | , (max(size_in_megabytes)::float / min(size_in_megabytes)::float) 63 | as ratio 64 | from slice_distribution 65 | group by 1 66 | 67 | ) 68 | 69 | select 70 | 71 | trim(pg_namespace.nspname) as schema 72 | , trim(unsorted_by_table.table_name) as table 73 | , unsorted_by_table.rows 74 | , unsorted_by_table.unsorted_rows 75 | , {{percentage('unsorted_by_table.unsorted_rows', 76 | 'unsorted_by_table.rows')}} 77 | as percent_rows_unsorted 78 | , unsorted_by_table.table_id 79 | 80 | , {{decode_reldiststyle('pg_class.reldiststyle', 81 | 'table_attributes.dist_key')}} as dist_style 82 | , table_distribution_ratio.ratio as dist_skew 83 | 84 | , (table_attributes.sort_key is not null) as is_sorted 85 | , table_attributes.sort_key 86 | , table_attributes.num_sort_keys 87 | , table_attributes.num_columns 88 | 89 | , table_sizes.size_in_megabytes 90 | , {{percentage('table_sizes.size_in_megabytes', 91 | 'capacity.total_megabytes')}} 92 | as disk_used_percent_of_total 93 | , table_attributes.is_encoded 94 | 95 | from unsorted_by_table 96 | 97 | left join pg_class 98 | on pg_class.oid = unsorted_by_table.table_id 99 | 100 | left join pg_namespace 101 | on pg_namespace.oid = pg_class.relnamespace 102 | 103 | left join capacity 104 | on 1=1 105 | 106 | left join table_sizes 107 | on unsorted_by_table.table_id = table_sizes.table_id 108 | 109 | left join table_attributes 110 | on table_attributes.table_id = unsorted_by_table.table_id 111 | 112 | left join table_distribution_ratio 113 | on table_distribution_ratio.table_id = unsorted_by_table.table_id 114 | -------------------------------------------------------------------------------- /models/views/redshift_admin_users_schema_privileges.sql: -------------------------------------------------------------------------------- 1 | with tables as ( 2 | 3 | select * from {{ref('pg_tables')}} 4 | 5 | ), views as ( 6 | 7 | select * from {{ref('pg_views')}} 8 | 9 | ), users as ( 10 | 11 | select * from {{ref('pg_user')}} 12 | 13 | ), schemas as ( 14 | 15 | select 16 | distinct(schema_name) 17 | from tables 18 | where schema_name not in ('pg_catalog', 'information_schema') 19 | 20 | union 21 | 22 | select 23 | distinct(schema_name) 24 | from views 25 | 26 | where schema_name not in ('pg_catalog', 'information_schema') 27 | 28 | ) 29 | 30 | 31 | select 32 | schemas.schema_name 33 | , users.username 34 | , has_schema_privilege(users.username, schemas.schema_name, 'usage') AS has_usage_privilege 35 | , has_schema_privilege(users.username, schemas.schema_name, 'create') AS has_create_privilege 36 | from schemas 37 | cross join users 38 | order by schemas.schema_name, users.username 39 | -------------------------------------------------------------------------------- /models/views/redshift_admin_users_table_view_privileges.sql: -------------------------------------------------------------------------------- 1 | with tables as ( 2 | 3 | select * from {{ref('pg_tables')}} 4 | 5 | ), views as ( 6 | 7 | select * from {{ref('pg_views')}} 8 | 9 | ), users as ( 10 | 11 | select * from {{ref('pg_user')}} 12 | 13 | ), objects as ( 14 | 15 | select 16 | schema_name 17 | , 'table' as object_type 18 | , table_name as object_name 19 | , '"' || schema_name || '"."' || table_name || '"' as full_object_name 20 | from tables 21 | where schema_name not in ('pg_catalog', 'information_schema') 22 | 23 | union 24 | 25 | select 26 | schema_name 27 | , 'view' as object_type 28 | , view_name as object_name 29 | , '"' || schema_name || '"."' || view_name || '"' as full_object_name 30 | from views 31 | where schema_name not in ('pg_catalog', 'information_schema') 32 | 33 | ) 34 | 35 | select 36 | objects.schema_name 37 | , objects.object_name 38 | , users.username 39 | , has_table_privilege(users.username, objects.full_object_name, 'select') as has_select_privilege 40 | , has_table_privilege(users.username, objects.full_object_name, 'insert') as has_insert_privilege 41 | , has_table_privilege(users.username, objects.full_object_name, 'update') as has_update_privilege 42 | , has_table_privilege(users.username, objects.full_object_name, 'delete') as has_delete_privilege 43 | , has_table_privilege(users.username, objects.full_object_name, 'references') as has_references_privilege 44 | from objects 45 | cross join users 46 | order by objects.full_object_name, users.username 47 | -------------------------------------------------------------------------------- /packages.yml: -------------------------------------------------------------------------------- 1 | packages: 2 | - package: dbt-labs/dbt_utils 3 | version: [">=0.8.0", "<2.0.0"] 4 | --------------------------------------------------------------------------------