├── .editorconfig ├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── conftest.py ├── hack ├── Dockerfile.hadoop ├── Dockerfile.hive ├── Dockerfile.superset ├── configs │ ├── hive │ │ └── hive-site.xml │ ├── superset │ │ ├── gunicorn.conf.py │ │ └── superset_config.py │ └── trino │ │ ├── catalog │ │ ├── hive.properties │ │ ├── mysql.properties │ │ └── postgresql.properties │ │ ├── config.properties │ │ ├── jvm.config │ │ ├── log.properties │ │ └── node.properties ├── docker-compose.yml ├── release.sh ├── samples │ ├── mysql │ │ ├── 1-sakila-schema.sql │ │ └── 2-sakila-data.sql │ └── postgres │ │ ├── dvdrental.sh │ │ └── dvdrental.tar └── scripts │ ├── hive │ ├── hiveserver2-entrypoint.sh │ ├── metastore-entrypoint.sh │ └── wait-for │ └── superset │ ├── docker-entrypoint.sh │ ├── superset-tools │ └── trino.py ├── requirements.txt ├── setup.py ├── sqlalchemy_trino ├── __init__.py ├── compiler.py ├── datatype.py ├── dbapi.py ├── dialect.py └── error.py └── tests ├── __init__.py ├── assertions.py ├── data └── population.csv ├── test_compiler.py ├── test_datatype_parse.py ├── test_datatype_split.py └── test_dialect.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 2 7 | indent_style = space 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.py] 12 | indent_size = 4 13 | 14 | [{*.mk, *.make, Makefile}] 15 | indent_style = tab 16 | 17 | [*.md] 18 | trim_trailing_whitespace = false 19 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: 11 | - '3.7' 12 | - '3.8' 13 | - '3.9' 14 | steps: 15 | - uses: actions/checkout@v2 16 | - uses: actions/setup-python@v2 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - name: Upgrade pip version 20 | run: | 21 | python -m pip install -U pip 22 | - run: pip install -r requirements.txt 23 | - run: pip install pytest assertpy 24 | - run: pytest 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### VirtualEnv template 2 | # Virtualenv 3 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 4 | .Python 5 | [Bb]in 6 | [Ii]nclude 7 | [Ll]ib 8 | [Ll]ib64 9 | [Ll]ocal 10 | pyvenv.cfg 11 | .venv 12 | pip-selfcheck.json 13 | 14 | ### Python template 15 | # Byte-compiled / optimized / DLL files 16 | __pycache__/ 17 | *.py[cod] 18 | *$py.class 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | env/ 26 | build/ 27 | develop-eggs/ 28 | dist/ 29 | downloads/ 30 | eggs/ 31 | .eggs/ 32 | lib/ 33 | lib64/ 34 | parts/ 35 | sdist/ 36 | var/ 37 | wheels/ 38 | *.egg-info/ 39 | .installed.cfg 40 | *.egg 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *,cover 61 | .hypothesis/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # celery beat schedule file 91 | celerybeat-schedule 92 | 93 | # SageMath parsed files 94 | *.sage.py 95 | 96 | # dotenv 97 | .env 98 | 99 | # virtualenv 100 | .venv 101 | venv/ 102 | ENV/ 103 | 104 | # Spyder project settings 105 | .spyderproject 106 | 107 | # Rope project settings 108 | .ropeproject 109 | 110 | ### JetBrains template 111 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 112 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 113 | .idea/ 114 | 115 | # User-specific stuff: 116 | #.idea/**/workspace.xml 117 | #.idea/**/tasks.xml 118 | #.idea/dictionaries 119 | 120 | # Sensitive or high-churn files: 121 | #.idea/**/dataSources/ 122 | #.idea/**/dataSources.ids 123 | #.idea/**/dataSources.xml 124 | #.idea/**/dataSources.local.xml 125 | #.idea/**/sqlDataSources.xml 126 | #.idea/**/dynamic.xml 127 | #.idea/**/uiDesigner.xml 128 | 129 | # Gradle: 130 | #.idea/**/gradle.xml 131 | #.idea/**/libraries 132 | 133 | # Mongo Explorer plugin: 134 | #.idea/**/mongoSettings.xml 135 | 136 | ## File-based project format: 137 | *.iws 138 | *.iml 139 | 140 | ## Plugin-specific files: 141 | 142 | # IntelliJ 143 | /out/ 144 | 145 | # mpeltonen/sbt-idea plugin 146 | .idea_modules/ 147 | 148 | # JIRA plugin 149 | atlassian-ide-plugin.xml 150 | 151 | # Crashlytics plugin (for Android Studio and IntelliJ) 152 | com_crashlytics_export_strings.xml 153 | crashlytics.properties 154 | crashlytics-build.properties 155 | fabric.properties 156 | 157 | ### Eclipse template 158 | .metadata 159 | bin/ 160 | tmp/ 161 | *.tmp 162 | *.bak 163 | *.swp 164 | *~.nib 165 | local.properties 166 | .settings/ 167 | .loadpath 168 | .recommenders 169 | 170 | # External tool builders 171 | .externalToolBuilders/ 172 | 173 | # Locally stored "Eclipse launch configurations" 174 | *.launch 175 | 176 | # PyDev specific (Python IDE for Eclipse) 177 | *.pydevproject 178 | 179 | # CDT-specific (C/C++ Development Tooling) 180 | .cproject 181 | 182 | # CDT- autotools 183 | .autotools 184 | 185 | # Java annotation processor (APT) 186 | .factorypath 187 | 188 | # PDT-specific (PHP Development Tools) 189 | .buildpath 190 | 191 | # sbteclipse plugin 192 | .target 193 | 194 | # Tern plugin 195 | .tern-project 196 | 197 | # TeXlipse plugin 198 | .texlipse 199 | 200 | # STS (Spring Tool Suite) 201 | .springBeans 202 | 203 | # Code Recommenders 204 | .recommenders/ 205 | 206 | # Scala IDE specific (Scala & Java development for Eclipse) 207 | .cache-main 208 | .scala_dependencies 209 | .worksheet 210 | 211 | ### SublimeText template 212 | # Cache files for Sublime Text 213 | *.tmlanguage.cache 214 | *.tmPreferences.cache 215 | *.stTheme.cache 216 | 217 | # Workspace files are user-specific 218 | *.sublime-workspace 219 | 220 | # Project files should be checked into the repository, unless a significant 221 | # proportion of contributors will probably not be using Sublime Text 222 | # *.sublime-project 223 | 224 | # SFTP configuration file 225 | sftp-config.json 226 | 227 | # Package control specific files 228 | Package Control.last-run 229 | Package Control.ca-list 230 | Package Control.ca-bundle 231 | Package Control.system-ca-bundle 232 | Package Control.cache/ 233 | Package Control.ca-certs/ 234 | Package Control.merged-ca-bundle 235 | Package Control.user-ca-bundle 236 | oscrypto-ca-bundle.crt 237 | bh_unicode_properties.cache 238 | 239 | # Sublime-github package stores a github token in this file 240 | # https://packagecontrol.io/packages/sublime-github 241 | GitHub.sublime-settings 242 | 243 | ### VisualStudioCode template 244 | .vscode/* 245 | #!.vscode/settings.json 246 | #!.vscode/tasks.json 247 | #!.vscode/launch.json 248 | #!.vscode/extensions.json 249 | 250 | ### Ansible template 251 | *.retry 252 | 253 | ### Vagrant template 254 | # General 255 | .vagrant/ 256 | 257 | # Log files (if you are creating logs in debug mode, uncomment this) 258 | *.logs 259 | *.log 260 | 261 | ### Terraform 262 | # Local .terraform directories 263 | **/.terraform/* 264 | 265 | # .tfstate files 266 | *.tfstate 267 | *.tfstate.* 268 | 269 | # Crash log files 270 | crash.log 271 | 272 | # Ignore any .tfvars files that are generated automatically for each Terraform run. Most 273 | # .tfvars files are managed as part of configuration and so should be included in 274 | # version control. 275 | # 276 | # example.tfvars 277 | 278 | # Ignore override files as they are usually used to override resources locally and so 279 | # are not checked in 280 | override.tf 281 | override.tf.json 282 | *_override.tf 283 | *_override.tf.json 284 | 285 | # Include override files you do wish to add to version control using negated pattern 286 | # !example_override.tf 287 | 288 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 289 | # example: *tfplan* 290 | 291 | ### JupyterNotebooks ### 292 | # gitignore template for Jupyter Notebooks 293 | # website: http://jupyter.org/ 294 | 295 | .ipynb_checkpoints 296 | */.ipynb_checkpoints/* 297 | 298 | # IPython 299 | profile_default/ 300 | ipython_config.py 301 | 302 | # Remove previous ipynb_checkpoints 303 | # git rm -r .ipynb_checkpoints/ 304 | 305 | ### Custom ignore 306 | !.gitkeep 307 | .ci 308 | .pip 309 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | # recursive-include sqlalchemy_trino * 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | sqlalchemy-trino 2 | ================ 3 | 4 | ## ⚠️ Deprecation and Archive Notice 5 | `sqlalchemy-trino` was developed as _[Trino](https://trino.io/) (f.k.a PrestoSQL) dialect for SQLAlchemy._ 6 | Since trinodb/trino-python-client#81, all code of `sqlalchemy-trino` is donated and merged into upstream project. 7 | So now, this project is no longer active and consider as deprecated. 8 | 9 | 10 | ## Supported Trino version 11 | 12 | Trino version 352 and higher 13 | 14 | ## Installation 15 | The driver can either be installed through PyPi or from the source code. 16 | ### Through Python Package Index 17 | ```bash 18 | pip install sqlalchemy-trino 19 | ``` 20 | 21 | ### Latest from Source Code 22 | ```bash 23 | pip install git+https://github.com/dungdm93/sqlalchemy-trino 24 | ``` 25 | 26 | ## Usage 27 | To connect from SQLAlchemy to Trino, use connection string (URL) following this pattern: 28 | ``` 29 | trino://:@:/catalog/[schema] 30 | ``` 31 | 32 | ### JWT authentication 33 | 34 | You can pass the JWT token via either `connect_args` or the query string 35 | parameter `accessToken`: 36 | 37 | ```Python 38 | from sqlalchemy.engine import create_engine 39 | from trino.auth import JWTAuthentication 40 | 41 | # pass access token via connect_args 42 | engine = create_engine( 43 | 'trino://@:/', 44 | connect_args={'auth': JWTAuthentication('a-jwt-token')}, 45 | ) 46 | 47 | # pass access token via the query string param accessToken 48 | engine = create_engine( 49 | 'trino://@:/?accessToken=a-jwt-token', 50 | ) 51 | ``` 52 | 53 | **Notice**: When using username and password, it will connect to Trino over TLS 54 | connection automatically. 55 | 56 | ### User impersonation 57 | 58 | It supports user impersonation with username and password based authentication only. 59 | 60 | You can pass the session user (a.k.a., the user that will be impersonated) via 61 | either [`connect_args`](https://docs.sqlalchemy.org/en/13/core/engines.html#sqlalchemy.create_engine.params.connect_args) 62 | or the query string parameter `sessionUser`: 63 | 64 | ```Python 65 | from sqlalchemy.engine import create_engine 66 | 67 | # pass session user via connect_args 68 | engine = create_engine( 69 | 'trino://:@:/', 70 | connect_args={'user': 'user-to-be-impersonated'}, 71 | ) 72 | 73 | # pass session user via a query string parameter 74 | engine = create_engine( 75 | 'trino://:@:/?sessionUser=user-to-be-impersonated', 76 | ) 77 | ``` 78 | 79 | ### Pandas support 80 | ```python 81 | import pandas as pd 82 | from pandas import DataFrame 83 | import sqlalchemy_trino 84 | from sqlalchemy.engine import Engine, Connection 85 | 86 | def trino_pandas_write(engine: Engine): 87 | df: DataFrame = pd.read_csv("tests/data/population.csv") 88 | df.to_sql(con=engine, schema="default", name="abcxyz", method="multi", index=False) 89 | 90 | print(df) 91 | 92 | 93 | def trino_pandas_read(engine: Engine): 94 | connection: Connection = engine.connect() 95 | df = pd.read_sql("SELECT * FROM public.foobar", connection) 96 | 97 | print(df) 98 | ``` 99 | 100 | **Note**: in `df.to_sql` following params is required: 101 | * `index=False` because index is not supported in Trino. 102 | * `method="multi"`: currently `method=None` (default) is not working because Trino dbapi is not support [`executemany`](https://github.com/trinodb/trino-python-client/blob/77adbc48cd5061b2c55e56225d67dd7822284b73/trino/dbapi.py#L410-L411) yet 103 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | import tests.assertions # noqa 2 | -------------------------------------------------------------------------------- /hack/Dockerfile.hadoop: -------------------------------------------------------------------------------- 1 | FROM alpine:3.10 AS downloader 2 | 3 | WORKDIR /build 4 | RUN apk add -U curl gnupg tar 5 | 6 | # Main Apache distributions: 7 | # * https://apache.org/dist 8 | # * https://archive.apache.org/dist 9 | # * https://dist.apache.org/repos/dist/release 10 | # List all Apache mirrors: 11 | # * https://apache.org/mirrors 12 | ARG APACHE_DIST=https://archive.apache.org/dist 13 | ARG APACHE_MIRROR=${APACHE_DIST} 14 | ARG HADOOP_VERSION=3.3.0 15 | 16 | RUN set -eux; \ 17 | curl -L "${APACHE_DIST}/hadoop/common/KEYS" | gpg --batch --import -; \ 18 | curl -LO "${APACHE_MIRROR}/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz"; \ 19 | curl -L "${APACHE_DIST}/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz.asc" \ 20 | | gpg --batch --verify - "hadoop-${HADOOP_VERSION}.tar.gz"; 21 | RUN tar -xf "hadoop-${HADOOP_VERSION}.tar.gz" --no-same-owner \ 22 | --exclude="hadoop-*/share/doc"; \ 23 | mv "hadoop-${HADOOP_VERSION}" "hadoop"; 24 | 25 | 26 | 27 | FROM ubuntu:focal 28 | LABEL maintainer="Dũng Đặng Minh " 29 | SHELL [ "/bin/bash", "-c" ] 30 | 31 | RUN set -eux; \ 32 | apt-get update; \ 33 | apt-get install -y --no-install-recommends \ 34 | openjdk-8-jre-headless ca-certificates libc6 \ 35 | libbz2-1.0 liblz4-1 libsnappy1v5 zlib1g libzstd1 \ 36 | libssl1.1 libisal2 libnss3 libpam-modules krb5-user procps; \ 37 | ln -s libcrypto.so.1.1 /usr/lib/x86_64-linux-gnu/libcrypto.so; \ 38 | ln -s libssl.so.1.1 /usr/lib/x86_64-linux-gnu/libssl.so; \ 39 | apt-get clean; \ 40 | rm -rf /var/lib/apt/lists/* 41 | 42 | # TODO: Native Hadoop Library 43 | # > hadoop checknative -a 44 | # * libbz2-1.0 liblz4-1 libsnappy1v5 zlib1g libzstd1 45 | # * libssl1.1 46 | # ln -s libcrypto.so.1.1 /usr/lib/x86_64-linux-gnu/libcrypto.so 47 | # ln -s libssl.so.1.1 /usr/lib/x86_64-linux-gnu/libssl.so 48 | # * libisal2 49 | # ln -s libisal.so.2 /usr/lib/x86_64-linux-gnu/libisal.so 50 | 51 | ENV JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64" \ 52 | HADOOP_HOME="/opt/hadoop" 53 | 54 | COPY --from=downloader "/build/hadoop" "${HADOOP_HOME}" 55 | 56 | ENV PATH="${HADOOP_HOME}/bin:${PATH}" \ 57 | LD_LIBRARY_PATH="${HADOOP_HOME}/lib/native:${LD_LIBRARY_PATH}" 58 | -------------------------------------------------------------------------------- /hack/Dockerfile.hive: -------------------------------------------------------------------------------- 1 | ARG HADOOP_VERSION=3.1.0 2 | 3 | FROM alpine:3.10 AS downloader 4 | 5 | WORKDIR /build 6 | RUN apk add -U curl gnupg tar 7 | 8 | # Main Apache distributions: 9 | # * https://apache.org/dist 10 | # * https://archive.apache.org/dist 11 | # * https://dist.apache.org/repos/dist/release 12 | # List all Apache mirrors: 13 | # * https://apache.org/mirrors 14 | ARG APACHE_DIST=https://archive.apache.org/dist 15 | ARG APACHE_MIRROR=${APACHE_DIST} 16 | ARG HIVE_VERSION=3.1.2 17 | 18 | RUN set -eux; \ 19 | curl -L "${APACHE_DIST}/hive/KEYS" | gpg --batch --import -; \ 20 | curl -LO "${APACHE_MIRROR}/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz"; \ 21 | curl -L "${APACHE_DIST}/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz.asc" \ 22 | | gpg --batch --verify - "apache-hive-${HIVE_VERSION}-bin.tar.gz"; 23 | RUN tar -xf "apache-hive-${HIVE_VERSION}-bin.tar.gz" --no-same-owner; \ 24 | mv "apache-hive-${HIVE_VERSION}-bin" "hive"; 25 | 26 | 27 | 28 | FROM dungdm93/hadoop:${HADOOP_VERSION} 29 | 30 | # Tools 31 | RUN set -eux; \ 32 | apt-get update; \ 33 | apt-get install -y netcat; \ 34 | apt-get clean; \ 35 | rm -rf /var/lib/apt/lists/* 36 | 37 | ENV HIVE_HOME="/opt/hive" \ 38 | PATH="/opt/hive/bin:${PATH}" 39 | 40 | COPY --from=downloader "/build/hive" "${HIVE_HOME}" 41 | -------------------------------------------------------------------------------- /hack/Dockerfile.superset: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim as base 2 | LABEL maintainer="Teko's DataOps Team " 3 | SHELL ["/bin/bash", "-c"] 4 | 5 | RUN set -eux; \ 6 | apt-get update; \ 7 | apt-get install -y --no-install-recommends \ 8 | libbz2-1.0 liblz4-1 libsnappy1v5 zlib1g libzstd1 \ 9 | libev4 libssl1.1 libisal2 libnss3 \ 10 | libpq5 libmariadb3 \ 11 | curl locales; \ 12 | \ 13 | apt-get clean; \ 14 | rm -rf /var/lib/apt/lists/* 15 | 16 | # Firefox + Gecko driver. For Ubuntu, using `apt install firefox firefox-geckodriver` 17 | ARG GECKO_DRIVER_VERSION=v0.29.0 18 | RUN set -eux; cd /tmp/; \ 19 | apt-get update; \ 20 | apt-get install -y --no-install-recommends \ 21 | xvfb libxi6 libgconf-2-4 \ 22 | firefox-esr; \ 23 | apt-get clean; \ 24 | rm -rf /var/lib/apt/lists/*; \ 25 | curl -sSL "https://github.com/mozilla/geckodriver/releases/download/${GECKO_DRIVER_VERSION}/geckodriver-${GECKO_DRIVER_VERSION}-linux64.tar.gz" \ 26 | | tar -xzf - -C /usr/local/bin --no-same-owner; 27 | 28 | RUN set -eux; \ 29 | sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen; \ 30 | locale-gen; \ 31 | update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8; 32 | 33 | ENV SUPERSET_HOME="/opt/superset" 34 | RUN set -eux; \ 35 | useradd -ms "/bin/bash" --uid=1000 superset; \ 36 | mkdir -p "${SUPERSET_HOME}"; \ 37 | chown -R superset: "${SUPERSET_HOME}"; 38 | 39 | WORKDIR ${SUPERSET_HOME} 40 | 41 | FROM base AS builder 42 | 43 | ARG SUPERSET_VERSION=1.0.1 44 | 45 | RUN set -eux; \ 46 | apt-get update; \ 47 | apt-get install -y \ 48 | build-essential \ 49 | # pip install pyodbc 50 | unixodbc-dev \ 51 | # pip install mysqlclient 52 | default-libmysqlclient-dev \ 53 | # pip install pylibmc 54 | libmemcached-dev \ 55 | # pip install sasl 56 | libsasl2-dev; 57 | 58 | RUN set -eux; \ 59 | function join { local IFS="$1"; echo "${*:2}"; }; \ 60 | SUPERSET_PACKAGES=( \ 61 | # Cloud 62 | athena bigquery redshift \ 63 | dremio snowflake teradata vertica exasol \ 64 | # Database 65 | mysql postgres mmsql oracle db2 hana \ 66 | clickhouse cockroachdb elasticsearch \ 67 | excel gsheets \ 68 | # Big Data 69 | drill druid hive impala kylin pinot presto \ 70 | # Others 71 | cors thumbnails \ 72 | ); \ 73 | pip install "apache-superset[$(join ',' ${SUPERSET_PACKAGES[@]})]==${SUPERSET_VERSION}" \ 74 | "gunicorn[gevent,eventlet]" "flower~=0.9" "authlib~=0.15" "redis~=3.5" "pylibmc~=1.6"; \ 75 | \ 76 | rm -rf /usr/local/cx_Oracle-doc; 77 | 78 | FROM base 79 | 80 | COPY --from=builder /usr/local /usr/local 81 | COPY ./scripts/superset/* /usr/local/bin/ 82 | RUN mv /usr/local/bin/trino.py /usr/local/lib/python3.8/site-packages/superset/db_engine_specs/ 83 | 84 | USER superset 85 | EXPOSE 8088 5555 86 | ENTRYPOINT [ "/usr/local/bin/docker-entrypoint.sh" ] 87 | -------------------------------------------------------------------------------- /hack/configs/hive/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | 11 | hive.metastore.db.type 12 | postgres 13 | 14 | 15 | 16 | javax.jdo.option.ConnectionURL 17 | jdbc:postgresql://postgres:5432/postgres 18 | 19 | 20 | 21 | javax.jdo.option.ConnectionDriverName 22 | org.postgresql.Driver 23 | 24 | 25 | 26 | javax.jdo.option.ConnectionUserName 27 | postgres 28 | 29 | 30 | 31 | javax.jdo.option.ConnectionPassword 32 | SuperSecr3t 33 | 34 | 35 | 36 | hive.metastore.schema.verification 37 | true 38 | 39 | 40 | 41 | hive.metastore.warehouse.dir 42 | file:///data/warehouse/ 43 | 44 | 45 | 46 | hive.metastore.uris 47 | thrift://hive-metastore:9083 48 | 49 | 50 | 51 | hive.metastore.event.db.notification.api.auth 52 | false 53 | 54 | 55 | 56 | hive.default.fileformat 57 | parquet 58 | 59 | 60 | -------------------------------------------------------------------------------- /hack/configs/superset/gunicorn.conf.py: -------------------------------------------------------------------------------- 1 | accesslog = '-' # stdout 2 | errorlog = '-' # stderr 3 | 4 | workers = 4 5 | worker_class = 'gevent' 6 | worker_connections = 1000 7 | -------------------------------------------------------------------------------- /hack/configs/superset/superset_config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] [%(levelname)-5s] %(name)-15s:%(lineno)d: %(message)s') 5 | 6 | SUPERSET_WEBSERVER_PROTOCOL = 'http' 7 | SUPERSET_WEBSERVER_ADDRESS = '0.0.0.0' 8 | SUPERSET_WEBSERVER_PORT = 8088 9 | ENABLE_PROXY_FIX = True 10 | SQLALCHEMY_DATABASE_URI = 'postgresql://postgres:SuperSecr3t@postgres:5432/postgres' 11 | 12 | if "SUPERSET_HOME" in os.environ: 13 | DATA_DIR = os.environ["SUPERSET_HOME"] 14 | else: 15 | DATA_DIR = os.path.join(os.path.expanduser("~"), ".superset") 16 | 17 | UPLOAD_FOLDER = DATA_DIR + "/app/static/uploads/" 18 | IMG_UPLOAD_FOLDER = DATA_DIR + "/app/static/uploads/" 19 | -------------------------------------------------------------------------------- /hack/configs/trino/catalog/hive.properties: -------------------------------------------------------------------------------- 1 | connector.name=hive 2 | hive.metastore.uri=thrift://hive-metastore:9083 3 | hive.metastore-timeout=1m 4 | hive.storage-format=parquet 5 | hive.security=allow-all 6 | -------------------------------------------------------------------------------- /hack/configs/trino/catalog/mysql.properties: -------------------------------------------------------------------------------- 1 | connector.name=mysql 2 | connection-url=jdbc:mysql://mysql:3306 3 | connection-user=root 4 | connection-password=SuperSecr3t 5 | allow-drop-table=true 6 | -------------------------------------------------------------------------------- /hack/configs/trino/catalog/postgresql.properties: -------------------------------------------------------------------------------- 1 | connector.name=postgresql 2 | connection-url=jdbc:postgresql://postgres:5432/dvdrental 3 | connection-user=postgres 4 | connection-password=SuperSecr3t 5 | postgresql.array-mapping=AS_ARRAY 6 | allow-drop-table=true 7 | -------------------------------------------------------------------------------- /hack/configs/trino/config.properties: -------------------------------------------------------------------------------- 1 | coordinator=true 2 | node-scheduler.include-coordinator=true 3 | http-server.http.port=8080 4 | query.max-memory=5GB 5 | query.max-total-memory=10GB 6 | query.max-memory-per-node=1GB 7 | query.max-total-memory-per-node=2GB 8 | discovery-server.enabled=true 9 | discovery.uri=http://localhost:8080 10 | -------------------------------------------------------------------------------- /hack/configs/trino/jvm.config: -------------------------------------------------------------------------------- 1 | -server 2 | -Xmx16G 3 | -XX:-UseBiasedLocking 4 | -XX:+UseG1GC 5 | -XX:G1HeapRegionSize=32M 6 | -XX:+ExplicitGCInvokesConcurrent 7 | -XX:+HeapDumpOnOutOfMemoryError 8 | -XX:+UseGCOverheadLimit 9 | -XX:+ExitOnOutOfMemoryError 10 | -XX:ReservedCodeCacheSize=256M 11 | -Djdk.attach.allowAttachSelf=true 12 | -Djdk.nio.maxCachedBufferSize=2000000 13 | -------------------------------------------------------------------------------- /hack/configs/trino/log.properties: -------------------------------------------------------------------------------- 1 | # Enable verbose logging from Presto 2 | #io.trino=DEBUG 3 | -------------------------------------------------------------------------------- /hack/configs/trino/node.properties: -------------------------------------------------------------------------------- 1 | node.environment=docker 2 | node.data-dir=/data/trino 3 | plugin.dir=/usr/lib/trino/plugin 4 | -------------------------------------------------------------------------------- /hack/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | services: 4 | ##### MySQL ##### 5 | # https://hub.docker.com/_/mysql 6 | # Help: 7 | # > mysql -u root -p 8 | mysql: 9 | image: mysql:8.0 10 | command: [--default-authentication-plugin=mysql_native_password] 11 | environment: 12 | MYSQL_ROOT_PASSWORD: SuperSecr3t 13 | ports: 14 | - 3306:3306 15 | - 33060:33060 16 | volumes: 17 | - ./samples/mysql/:/docker-entrypoint-initdb.d/ 18 | - mysql_data:/var/lib/mysql 19 | 20 | ##### PostgreSQL ##### 21 | # https://hub.docker.com/_/postgres 22 | # Help: 23 | # > psql -U postgres [-W] 24 | postgres: 25 | image: postgres:13-alpine 26 | environment: 27 | POSTGRES_PASSWORD: SuperSecr3t 28 | ports: 29 | - 5432:5432 30 | volumes: 31 | - ./samples/postgres/:/docker-entrypoint-initdb.d/ 32 | - postgres_data:/var/lib/postgresql/data 33 | 34 | hive-metastore: 35 | image: dungdm93/hive:3.1.2 36 | # HOTFIX: Illegal character in hostname at index 13: thrift://hive_hive-metastore_1.bookshelf:9083 37 | # Caused at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.resolveUris(HiveMetaStoreClient.java:267) 38 | container_name: hive-metastore 39 | entrypoint: ["/usr/local/scripts/metastore-entrypoint.sh"] 40 | depends_on: 41 | - postgres 42 | environment: 43 | HADOOP_OPTIONAL_TOOLS: hadoop-aws 44 | ports: 45 | - 9083:9083 46 | volumes: 47 | - ./scripts/hive/:/usr/local/scripts/ 48 | - ./configs/hive/:/opt/hive/conf/ 49 | - warehouse_data:/data/warehouse 50 | 51 | hive-server2: 52 | image: dungdm93/hive:3.1.2 53 | entrypoint: ["/usr/local/scripts/hiveserver2-entrypoint.sh"] 54 | depends_on: 55 | - hive-metastore 56 | environment: 57 | HADOOP_OPTIONAL_TOOLS: hadoop-aws 58 | ports: 59 | - 10000:10000 60 | - 10002:10002 # WebUI 61 | volumes: 62 | - ./scripts/hive/:/usr/local/scripts/ 63 | - ./configs/hive/:/opt/hive/conf/ 64 | - warehouse_data:/data/warehouse 65 | 66 | ##### TrinoDB ##### 67 | # https://hub.docker.com/r/trinodb/trino 68 | # Help: 69 | # > trino --server=localhost:8080 [--catalog=mysql] [--schema=sakila] 70 | trino: 71 | image: trinodb/trino:362 72 | ports: 73 | - 8080:8080 74 | volumes: 75 | - ./configs/trino/:/etc/trino/ 76 | - warehouse_data:/data/warehouse 77 | 78 | ##### Apache SuperSet ##### 79 | # Create an admin user 80 | # > superset fab create-admin 81 | # Load some data to play with 82 | # > superset load_examples 83 | # Create default roles and permissions 84 | # > superset init 85 | superset: 86 | image: dungdm93/superset:1.0 87 | command: [webserver] 88 | environment: 89 | SUPERSET_CONFIG_PATH: /etc/superset/superset_config.py 90 | CELERY_CONFIG_PATH: /etc/superset/celeryconfig.py 91 | PYTHONPATH: /app/ 92 | ports: 93 | - 8088:8088 94 | volumes: 95 | - ./configs/superset:/etc/superset 96 | - ../:/app/ 97 | 98 | volumes: 99 | mysql_data: 100 | postgres_data: 101 | warehouse_data: 102 | 103 | networks: 104 | default: 105 | name: bookshelf 106 | -------------------------------------------------------------------------------- /hack/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eux 3 | 4 | python setup.py sdist bdist_egg bdist_wheel 5 | 6 | twine check ./dist/* 7 | twine upload ./dist/* 8 | -------------------------------------------------------------------------------- /hack/samples/mysql/1-sakila-schema.sql: -------------------------------------------------------------------------------- 1 | -- Sakila Sample Database Schema 2 | -- Version 1.2 3 | 4 | -- Copyright (c) 2006, 2019, Oracle and/or its affiliates. 5 | -- All rights reserved. 6 | 7 | -- Redistribution and use in source and binary forms, with or without 8 | -- modification, are permitted provided that the following conditions are 9 | -- met: 10 | 11 | -- * Redistributions of source code must retain the above copyright notice, 12 | -- this list of conditions and the following disclaimer. 13 | -- * Redistributions in binary form must reproduce the above copyright 14 | -- notice, this list of conditions and the following disclaimer in the 15 | -- documentation and/or other materials provided with the distribution. 16 | -- * Neither the name of Oracle nor the names of its contributors may be used 17 | -- to endorse or promote products derived from this software without 18 | -- specific prior written permission. 19 | 20 | -- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 21 | -- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 22 | -- THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 | -- PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 24 | -- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | -- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | -- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | -- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | -- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | -- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | 32 | SET NAMES utf8mb4; 33 | SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0; 34 | SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0; 35 | SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='TRADITIONAL'; 36 | 37 | DROP SCHEMA IF EXISTS sakila; 38 | CREATE SCHEMA sakila; 39 | USE sakila; 40 | 41 | -- 42 | -- Table structure for table `actor` 43 | -- 44 | 45 | CREATE TABLE actor ( 46 | actor_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 47 | first_name VARCHAR(45) NOT NULL, 48 | last_name VARCHAR(45) NOT NULL, 49 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 50 | PRIMARY KEY (actor_id), 51 | KEY idx_actor_last_name (last_name) 52 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 53 | 54 | -- 55 | -- Table structure for table `address` 56 | -- 57 | 58 | CREATE TABLE address ( 59 | address_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 60 | address VARCHAR(50) NOT NULL, 61 | address2 VARCHAR(50) DEFAULT NULL, 62 | district VARCHAR(20) NOT NULL, 63 | city_id SMALLINT UNSIGNED NOT NULL, 64 | postal_code VARCHAR(10) DEFAULT NULL, 65 | phone VARCHAR(20) NOT NULL, 66 | -- Add GEOMETRY column for MySQL 5.7.5 and higher 67 | -- Also include SRID attribute for MySQL 8.0.3 and higher 68 | /*!50705 location GEOMETRY */ /*!80003 SRID 0 */ /*!50705 NOT NULL,*/ 69 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 70 | PRIMARY KEY (address_id), 71 | KEY idx_fk_city_id (city_id), 72 | /*!50705 SPATIAL KEY `idx_location` (location),*/ 73 | CONSTRAINT `fk_address_city` FOREIGN KEY (city_id) REFERENCES city (city_id) ON DELETE RESTRICT ON UPDATE CASCADE 74 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 75 | 76 | -- 77 | -- Table structure for table `category` 78 | -- 79 | 80 | CREATE TABLE category ( 81 | category_id TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, 82 | name VARCHAR(25) NOT NULL, 83 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 84 | PRIMARY KEY (category_id) 85 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 86 | 87 | -- 88 | -- Table structure for table `city` 89 | -- 90 | 91 | CREATE TABLE city ( 92 | city_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 93 | city VARCHAR(50) NOT NULL, 94 | country_id SMALLINT UNSIGNED NOT NULL, 95 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 96 | PRIMARY KEY (city_id), 97 | KEY idx_fk_country_id (country_id), 98 | CONSTRAINT `fk_city_country` FOREIGN KEY (country_id) REFERENCES country (country_id) ON DELETE RESTRICT ON UPDATE CASCADE 99 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 100 | 101 | -- 102 | -- Table structure for table `country` 103 | -- 104 | 105 | CREATE TABLE country ( 106 | country_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 107 | country VARCHAR(50) NOT NULL, 108 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 109 | PRIMARY KEY (country_id) 110 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 111 | 112 | -- 113 | -- Table structure for table `customer` 114 | -- 115 | 116 | CREATE TABLE customer ( 117 | customer_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 118 | store_id TINYINT UNSIGNED NOT NULL, 119 | first_name VARCHAR(45) NOT NULL, 120 | last_name VARCHAR(45) NOT NULL, 121 | email VARCHAR(50) DEFAULT NULL, 122 | address_id SMALLINT UNSIGNED NOT NULL, 123 | active BOOLEAN NOT NULL DEFAULT TRUE, 124 | create_date DATETIME NOT NULL, 125 | last_update TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 126 | PRIMARY KEY (customer_id), 127 | KEY idx_fk_store_id (store_id), 128 | KEY idx_fk_address_id (address_id), 129 | KEY idx_last_name (last_name), 130 | CONSTRAINT fk_customer_address FOREIGN KEY (address_id) REFERENCES address (address_id) ON DELETE RESTRICT ON UPDATE CASCADE, 131 | CONSTRAINT fk_customer_store FOREIGN KEY (store_id) REFERENCES store (store_id) ON DELETE RESTRICT ON UPDATE CASCADE 132 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 133 | 134 | -- 135 | -- Table structure for table `film` 136 | -- 137 | 138 | CREATE TABLE film ( 139 | film_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 140 | title VARCHAR(128) NOT NULL, 141 | description TEXT DEFAULT NULL, 142 | release_year YEAR DEFAULT NULL, 143 | language_id TINYINT UNSIGNED NOT NULL, 144 | original_language_id TINYINT UNSIGNED DEFAULT NULL, 145 | rental_duration TINYINT UNSIGNED NOT NULL DEFAULT 3, 146 | rental_rate DECIMAL(4,2) NOT NULL DEFAULT 4.99, 147 | length SMALLINT UNSIGNED DEFAULT NULL, 148 | replacement_cost DECIMAL(5,2) NOT NULL DEFAULT 19.99, 149 | rating ENUM('G','PG','PG-13','R','NC-17') DEFAULT 'G', 150 | special_features SET('Trailers','Commentaries','Deleted Scenes','Behind the Scenes') DEFAULT NULL, 151 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 152 | PRIMARY KEY (film_id), 153 | KEY idx_title (title), 154 | KEY idx_fk_language_id (language_id), 155 | KEY idx_fk_original_language_id (original_language_id), 156 | CONSTRAINT fk_film_language FOREIGN KEY (language_id) REFERENCES language (language_id) ON DELETE RESTRICT ON UPDATE CASCADE, 157 | CONSTRAINT fk_film_language_original FOREIGN KEY (original_language_id) REFERENCES language (language_id) ON DELETE RESTRICT ON UPDATE CASCADE 158 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 159 | 160 | -- 161 | -- Table structure for table `film_actor` 162 | -- 163 | 164 | CREATE TABLE film_actor ( 165 | actor_id SMALLINT UNSIGNED NOT NULL, 166 | film_id SMALLINT UNSIGNED NOT NULL, 167 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 168 | PRIMARY KEY (actor_id,film_id), 169 | KEY idx_fk_film_id (`film_id`), 170 | CONSTRAINT fk_film_actor_actor FOREIGN KEY (actor_id) REFERENCES actor (actor_id) ON DELETE RESTRICT ON UPDATE CASCADE, 171 | CONSTRAINT fk_film_actor_film FOREIGN KEY (film_id) REFERENCES film (film_id) ON DELETE RESTRICT ON UPDATE CASCADE 172 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 173 | 174 | -- 175 | -- Table structure for table `film_category` 176 | -- 177 | 178 | CREATE TABLE film_category ( 179 | film_id SMALLINT UNSIGNED NOT NULL, 180 | category_id TINYINT UNSIGNED NOT NULL, 181 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 182 | PRIMARY KEY (film_id, category_id), 183 | CONSTRAINT fk_film_category_film FOREIGN KEY (film_id) REFERENCES film (film_id) ON DELETE RESTRICT ON UPDATE CASCADE, 184 | CONSTRAINT fk_film_category_category FOREIGN KEY (category_id) REFERENCES category (category_id) ON DELETE RESTRICT ON UPDATE CASCADE 185 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 186 | 187 | -- 188 | -- Table structure for table `film_text` 189 | -- 190 | -- InnoDB added FULLTEXT support in 5.6.10. If you use an 191 | -- earlier version, then consider upgrading (recommended) or 192 | -- changing InnoDB to MyISAM as the film_text engine 193 | -- 194 | 195 | -- Use InnoDB for film_text as of 5.6.10, MyISAM prior to 5.6.10. 196 | SET @old_default_storage_engine = @@default_storage_engine; 197 | SET @@default_storage_engine = 'MyISAM'; 198 | /*!50610 SET @@default_storage_engine = 'InnoDB'*/; 199 | 200 | CREATE TABLE film_text ( 201 | film_id SMALLINT NOT NULL, 202 | title VARCHAR(255) NOT NULL, 203 | description TEXT, 204 | PRIMARY KEY (film_id), 205 | FULLTEXT KEY idx_title_description (title,description) 206 | ) DEFAULT CHARSET=utf8mb4; 207 | 208 | SET @@default_storage_engine = @old_default_storage_engine; 209 | 210 | -- 211 | -- Triggers for loading film_text from film 212 | -- 213 | 214 | DELIMITER ;; 215 | CREATE TRIGGER `ins_film` AFTER INSERT ON `film` FOR EACH ROW BEGIN 216 | INSERT INTO film_text (film_id, title, description) 217 | VALUES (new.film_id, new.title, new.description); 218 | END;; 219 | 220 | 221 | CREATE TRIGGER `upd_film` AFTER UPDATE ON `film` FOR EACH ROW BEGIN 222 | IF (old.title != new.title) OR (old.description != new.description) OR (old.film_id != new.film_id) 223 | THEN 224 | UPDATE film_text 225 | SET title=new.title, 226 | description=new.description, 227 | film_id=new.film_id 228 | WHERE film_id=old.film_id; 229 | END IF; 230 | END;; 231 | 232 | 233 | CREATE TRIGGER `del_film` AFTER DELETE ON `film` FOR EACH ROW BEGIN 234 | DELETE FROM film_text WHERE film_id = old.film_id; 235 | END;; 236 | 237 | DELIMITER ; 238 | 239 | -- 240 | -- Table structure for table `inventory` 241 | -- 242 | 243 | CREATE TABLE inventory ( 244 | inventory_id MEDIUMINT UNSIGNED NOT NULL AUTO_INCREMENT, 245 | film_id SMALLINT UNSIGNED NOT NULL, 246 | store_id TINYINT UNSIGNED NOT NULL, 247 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 248 | PRIMARY KEY (inventory_id), 249 | KEY idx_fk_film_id (film_id), 250 | KEY idx_store_id_film_id (store_id,film_id), 251 | CONSTRAINT fk_inventory_store FOREIGN KEY (store_id) REFERENCES store (store_id) ON DELETE RESTRICT ON UPDATE CASCADE, 252 | CONSTRAINT fk_inventory_film FOREIGN KEY (film_id) REFERENCES film (film_id) ON DELETE RESTRICT ON UPDATE CASCADE 253 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 254 | 255 | -- 256 | -- Table structure for table `language` 257 | -- 258 | 259 | CREATE TABLE language ( 260 | language_id TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, 261 | name CHAR(20) NOT NULL, 262 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 263 | PRIMARY KEY (language_id) 264 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 265 | 266 | -- 267 | -- Table structure for table `payment` 268 | -- 269 | 270 | CREATE TABLE payment ( 271 | payment_id SMALLINT UNSIGNED NOT NULL AUTO_INCREMENT, 272 | customer_id SMALLINT UNSIGNED NOT NULL, 273 | staff_id TINYINT UNSIGNED NOT NULL, 274 | rental_id INT DEFAULT NULL, 275 | amount DECIMAL(5,2) NOT NULL, 276 | payment_date DATETIME NOT NULL, 277 | last_update TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 278 | PRIMARY KEY (payment_id), 279 | KEY idx_fk_staff_id (staff_id), 280 | KEY idx_fk_customer_id (customer_id), 281 | CONSTRAINT fk_payment_rental FOREIGN KEY (rental_id) REFERENCES rental (rental_id) ON DELETE SET NULL ON UPDATE CASCADE, 282 | CONSTRAINT fk_payment_customer FOREIGN KEY (customer_id) REFERENCES customer (customer_id) ON DELETE RESTRICT ON UPDATE CASCADE, 283 | CONSTRAINT fk_payment_staff FOREIGN KEY (staff_id) REFERENCES staff (staff_id) ON DELETE RESTRICT ON UPDATE CASCADE 284 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 285 | 286 | 287 | -- 288 | -- Table structure for table `rental` 289 | -- 290 | 291 | CREATE TABLE rental ( 292 | rental_id INT NOT NULL AUTO_INCREMENT, 293 | rental_date DATETIME NOT NULL, 294 | inventory_id MEDIUMINT UNSIGNED NOT NULL, 295 | customer_id SMALLINT UNSIGNED NOT NULL, 296 | return_date DATETIME DEFAULT NULL, 297 | staff_id TINYINT UNSIGNED NOT NULL, 298 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 299 | PRIMARY KEY (rental_id), 300 | UNIQUE KEY (rental_date,inventory_id,customer_id), 301 | KEY idx_fk_inventory_id (inventory_id), 302 | KEY idx_fk_customer_id (customer_id), 303 | KEY idx_fk_staff_id (staff_id), 304 | CONSTRAINT fk_rental_staff FOREIGN KEY (staff_id) REFERENCES staff (staff_id) ON DELETE RESTRICT ON UPDATE CASCADE, 305 | CONSTRAINT fk_rental_inventory FOREIGN KEY (inventory_id) REFERENCES inventory (inventory_id) ON DELETE RESTRICT ON UPDATE CASCADE, 306 | CONSTRAINT fk_rental_customer FOREIGN KEY (customer_id) REFERENCES customer (customer_id) ON DELETE RESTRICT ON UPDATE CASCADE 307 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 308 | 309 | -- 310 | -- Table structure for table `staff` 311 | -- 312 | 313 | CREATE TABLE staff ( 314 | staff_id TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, 315 | first_name VARCHAR(45) NOT NULL, 316 | last_name VARCHAR(45) NOT NULL, 317 | address_id SMALLINT UNSIGNED NOT NULL, 318 | picture BLOB DEFAULT NULL, 319 | email VARCHAR(50) DEFAULT NULL, 320 | store_id TINYINT UNSIGNED NOT NULL, 321 | active BOOLEAN NOT NULL DEFAULT TRUE, 322 | username VARCHAR(16) NOT NULL, 323 | password VARCHAR(40) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, 324 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 325 | PRIMARY KEY (staff_id), 326 | KEY idx_fk_store_id (store_id), 327 | KEY idx_fk_address_id (address_id), 328 | CONSTRAINT fk_staff_store FOREIGN KEY (store_id) REFERENCES store (store_id) ON DELETE RESTRICT ON UPDATE CASCADE, 329 | CONSTRAINT fk_staff_address FOREIGN KEY (address_id) REFERENCES address (address_id) ON DELETE RESTRICT ON UPDATE CASCADE 330 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 331 | 332 | -- 333 | -- Table structure for table `store` 334 | -- 335 | 336 | CREATE TABLE store ( 337 | store_id TINYINT UNSIGNED NOT NULL AUTO_INCREMENT, 338 | manager_staff_id TINYINT UNSIGNED NOT NULL, 339 | address_id SMALLINT UNSIGNED NOT NULL, 340 | last_update TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, 341 | PRIMARY KEY (store_id), 342 | UNIQUE KEY idx_unique_manager (manager_staff_id), 343 | KEY idx_fk_address_id (address_id), 344 | CONSTRAINT fk_store_staff FOREIGN KEY (manager_staff_id) REFERENCES staff (staff_id) ON DELETE RESTRICT ON UPDATE CASCADE, 345 | CONSTRAINT fk_store_address FOREIGN KEY (address_id) REFERENCES address (address_id) ON DELETE RESTRICT ON UPDATE CASCADE 346 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 347 | 348 | -- 349 | -- View structure for view `customer_list` 350 | -- 351 | 352 | CREATE VIEW customer_list 353 | AS 354 | SELECT cu.customer_id AS ID, CONCAT(cu.first_name, _utf8mb4' ', cu.last_name) AS name, a.address AS address, a.postal_code AS `zip code`, 355 | a.phone AS phone, city.city AS city, country.country AS country, IF(cu.active, _utf8mb4'active',_utf8mb4'') AS notes, cu.store_id AS SID 356 | FROM customer AS cu JOIN address AS a ON cu.address_id = a.address_id JOIN city ON a.city_id = city.city_id 357 | JOIN country ON city.country_id = country.country_id; 358 | 359 | -- 360 | -- View structure for view `film_list` 361 | -- 362 | 363 | CREATE VIEW film_list 364 | AS 365 | SELECT film.film_id AS FID, film.title AS title, film.description AS description, category.name AS category, film.rental_rate AS price, 366 | film.length AS length, film.rating AS rating, GROUP_CONCAT(CONCAT(actor.first_name, _utf8mb4' ', actor.last_name) SEPARATOR ', ') AS actors 367 | FROM category LEFT JOIN film_category ON category.category_id = film_category.category_id LEFT JOIN film ON film_category.film_id = film.film_id 368 | JOIN film_actor ON film.film_id = film_actor.film_id 369 | JOIN actor ON film_actor.actor_id = actor.actor_id 370 | GROUP BY film.film_id, category.name; 371 | 372 | -- 373 | -- View structure for view `nicer_but_slower_film_list` 374 | -- 375 | 376 | CREATE VIEW nicer_but_slower_film_list 377 | AS 378 | SELECT film.film_id AS FID, film.title AS title, film.description AS description, category.name AS category, film.rental_rate AS price, 379 | film.length AS length, film.rating AS rating, GROUP_CONCAT(CONCAT(CONCAT(UCASE(SUBSTR(actor.first_name,1,1)), 380 | LCASE(SUBSTR(actor.first_name,2,LENGTH(actor.first_name))),_utf8mb4' ',CONCAT(UCASE(SUBSTR(actor.last_name,1,1)), 381 | LCASE(SUBSTR(actor.last_name,2,LENGTH(actor.last_name)))))) SEPARATOR ', ') AS actors 382 | FROM category LEFT JOIN film_category ON category.category_id = film_category.category_id LEFT JOIN film ON film_category.film_id = film.film_id 383 | JOIN film_actor ON film.film_id = film_actor.film_id 384 | JOIN actor ON film_actor.actor_id = actor.actor_id 385 | GROUP BY film.film_id, category.name; 386 | 387 | -- 388 | -- View structure for view `staff_list` 389 | -- 390 | 391 | CREATE VIEW staff_list 392 | AS 393 | SELECT s.staff_id AS ID, CONCAT(s.first_name, _utf8mb4' ', s.last_name) AS name, a.address AS address, a.postal_code AS `zip code`, a.phone AS phone, 394 | city.city AS city, country.country AS country, s.store_id AS SID 395 | FROM staff AS s JOIN address AS a ON s.address_id = a.address_id JOIN city ON a.city_id = city.city_id 396 | JOIN country ON city.country_id = country.country_id; 397 | 398 | -- 399 | -- View structure for view `sales_by_store` 400 | -- 401 | 402 | CREATE VIEW sales_by_store 403 | AS 404 | SELECT 405 | CONCAT(c.city, _utf8mb4',', cy.country) AS store 406 | , CONCAT(m.first_name, _utf8mb4' ', m.last_name) AS manager 407 | , SUM(p.amount) AS total_sales 408 | FROM payment AS p 409 | INNER JOIN rental AS r ON p.rental_id = r.rental_id 410 | INNER JOIN inventory AS i ON r.inventory_id = i.inventory_id 411 | INNER JOIN store AS s ON i.store_id = s.store_id 412 | INNER JOIN address AS a ON s.address_id = a.address_id 413 | INNER JOIN city AS c ON a.city_id = c.city_id 414 | INNER JOIN country AS cy ON c.country_id = cy.country_id 415 | INNER JOIN staff AS m ON s.manager_staff_id = m.staff_id 416 | GROUP BY s.store_id 417 | ORDER BY cy.country, c.city; 418 | 419 | -- 420 | -- View structure for view `sales_by_film_category` 421 | -- 422 | -- Note that total sales will add up to >100% because 423 | -- some titles belong to more than 1 category 424 | -- 425 | 426 | CREATE VIEW sales_by_film_category 427 | AS 428 | SELECT 429 | c.name AS category 430 | , SUM(p.amount) AS total_sales 431 | FROM payment AS p 432 | INNER JOIN rental AS r ON p.rental_id = r.rental_id 433 | INNER JOIN inventory AS i ON r.inventory_id = i.inventory_id 434 | INNER JOIN film AS f ON i.film_id = f.film_id 435 | INNER JOIN film_category AS fc ON f.film_id = fc.film_id 436 | INNER JOIN category AS c ON fc.category_id = c.category_id 437 | GROUP BY c.name 438 | ORDER BY total_sales DESC; 439 | 440 | -- 441 | -- View structure for view `actor_info` 442 | -- 443 | 444 | CREATE DEFINER=CURRENT_USER SQL SECURITY INVOKER VIEW actor_info 445 | AS 446 | SELECT 447 | a.actor_id, 448 | a.first_name, 449 | a.last_name, 450 | GROUP_CONCAT(DISTINCT CONCAT(c.name, ': ', 451 | (SELECT GROUP_CONCAT(f.title ORDER BY f.title SEPARATOR ', ') 452 | FROM sakila.film f 453 | INNER JOIN sakila.film_category fc 454 | ON f.film_id = fc.film_id 455 | INNER JOIN sakila.film_actor fa 456 | ON f.film_id = fa.film_id 457 | WHERE fc.category_id = c.category_id 458 | AND fa.actor_id = a.actor_id 459 | ) 460 | ) 461 | ORDER BY c.name SEPARATOR '; ') 462 | AS film_info 463 | FROM sakila.actor a 464 | LEFT JOIN sakila.film_actor fa 465 | ON a.actor_id = fa.actor_id 466 | LEFT JOIN sakila.film_category fc 467 | ON fa.film_id = fc.film_id 468 | LEFT JOIN sakila.category c 469 | ON fc.category_id = c.category_id 470 | GROUP BY a.actor_id, a.first_name, a.last_name; 471 | 472 | -- 473 | -- Procedure structure for procedure `rewards_report` 474 | -- 475 | 476 | DELIMITER // 477 | 478 | CREATE PROCEDURE rewards_report ( 479 | IN min_monthly_purchases TINYINT UNSIGNED 480 | , IN min_dollar_amount_purchased DECIMAL(10,2) 481 | , OUT count_rewardees INT 482 | ) 483 | LANGUAGE SQL 484 | NOT DETERMINISTIC 485 | READS SQL DATA 486 | SQL SECURITY DEFINER 487 | COMMENT 'Provides a customizable report on best customers' 488 | proc: BEGIN 489 | 490 | DECLARE last_month_start DATE; 491 | DECLARE last_month_end DATE; 492 | 493 | /* Some sanity checks... */ 494 | IF min_monthly_purchases = 0 THEN 495 | SELECT 'Minimum monthly purchases parameter must be > 0'; 496 | LEAVE proc; 497 | END IF; 498 | IF min_dollar_amount_purchased = 0.00 THEN 499 | SELECT 'Minimum monthly dollar amount purchased parameter must be > $0.00'; 500 | LEAVE proc; 501 | END IF; 502 | 503 | /* Determine start and end time periods */ 504 | SET last_month_start = DATE_SUB(CURRENT_DATE(), INTERVAL 1 MONTH); 505 | SET last_month_start = STR_TO_DATE(CONCAT(YEAR(last_month_start),'-',MONTH(last_month_start),'-01'),'%Y-%m-%d'); 506 | SET last_month_end = LAST_DAY(last_month_start); 507 | 508 | /* 509 | Create a temporary storage area for 510 | Customer IDs. 511 | */ 512 | CREATE TEMPORARY TABLE tmpCustomer (customer_id SMALLINT UNSIGNED NOT NULL PRIMARY KEY); 513 | 514 | /* 515 | Find all customers meeting the 516 | monthly purchase requirements 517 | */ 518 | INSERT INTO tmpCustomer (customer_id) 519 | SELECT p.customer_id 520 | FROM payment AS p 521 | WHERE DATE(p.payment_date) BETWEEN last_month_start AND last_month_end 522 | GROUP BY customer_id 523 | HAVING SUM(p.amount) > min_dollar_amount_purchased 524 | AND COUNT(customer_id) > min_monthly_purchases; 525 | 526 | /* Populate OUT parameter with count of found customers */ 527 | SELECT COUNT(*) FROM tmpCustomer INTO count_rewardees; 528 | 529 | /* 530 | Output ALL customer information of matching rewardees. 531 | Customize output as needed. 532 | */ 533 | SELECT c.* 534 | FROM tmpCustomer AS t 535 | INNER JOIN customer AS c ON t.customer_id = c.customer_id; 536 | 537 | /* Clean up */ 538 | DROP TABLE tmpCustomer; 539 | END // 540 | 541 | DELIMITER ; 542 | 543 | DELIMITER $$ 544 | 545 | CREATE FUNCTION get_customer_balance(p_customer_id INT, p_effective_date DATETIME) RETURNS DECIMAL(5,2) 546 | DETERMINISTIC 547 | READS SQL DATA 548 | BEGIN 549 | 550 | #OK, WE NEED TO CALCULATE THE CURRENT BALANCE GIVEN A CUSTOMER_ID AND A DATE 551 | #THAT WE WANT THE BALANCE TO BE EFFECTIVE FOR. THE BALANCE IS: 552 | # 1) RENTAL FEES FOR ALL PREVIOUS RENTALS 553 | # 2) ONE DOLLAR FOR EVERY DAY THE PREVIOUS RENTALS ARE OVERDUE 554 | # 3) IF A FILM IS MORE THAN RENTAL_DURATION * 2 OVERDUE, CHARGE THE REPLACEMENT_COST 555 | # 4) SUBTRACT ALL PAYMENTS MADE BEFORE THE DATE SPECIFIED 556 | 557 | DECLARE v_rentfees DECIMAL(5,2); #FEES PAID TO RENT THE VIDEOS INITIALLY 558 | DECLARE v_overfees INTEGER; #LATE FEES FOR PRIOR RENTALS 559 | DECLARE v_payments DECIMAL(5,2); #SUM OF PAYMENTS MADE PREVIOUSLY 560 | 561 | SELECT IFNULL(SUM(film.rental_rate),0) INTO v_rentfees 562 | FROM film, inventory, rental 563 | WHERE film.film_id = inventory.film_id 564 | AND inventory.inventory_id = rental.inventory_id 565 | AND rental.rental_date <= p_effective_date 566 | AND rental.customer_id = p_customer_id; 567 | 568 | SELECT IFNULL(SUM(IF((TO_DAYS(rental.return_date) - TO_DAYS(rental.rental_date)) > film.rental_duration, 569 | ((TO_DAYS(rental.return_date) - TO_DAYS(rental.rental_date)) - film.rental_duration),0)),0) INTO v_overfees 570 | FROM rental, inventory, film 571 | WHERE film.film_id = inventory.film_id 572 | AND inventory.inventory_id = rental.inventory_id 573 | AND rental.rental_date <= p_effective_date 574 | AND rental.customer_id = p_customer_id; 575 | 576 | 577 | SELECT IFNULL(SUM(payment.amount),0) INTO v_payments 578 | FROM payment 579 | 580 | WHERE payment.payment_date <= p_effective_date 581 | AND payment.customer_id = p_customer_id; 582 | 583 | RETURN v_rentfees + v_overfees - v_payments; 584 | END $$ 585 | 586 | DELIMITER ; 587 | 588 | DELIMITER $$ 589 | 590 | CREATE PROCEDURE film_in_stock(IN p_film_id INT, IN p_store_id INT, OUT p_film_count INT) 591 | READS SQL DATA 592 | BEGIN 593 | SELECT inventory_id 594 | FROM inventory 595 | WHERE film_id = p_film_id 596 | AND store_id = p_store_id 597 | AND inventory_in_stock(inventory_id); 598 | 599 | SELECT COUNT(*) 600 | FROM inventory 601 | WHERE film_id = p_film_id 602 | AND store_id = p_store_id 603 | AND inventory_in_stock(inventory_id) 604 | INTO p_film_count; 605 | END $$ 606 | 607 | DELIMITER ; 608 | 609 | DELIMITER $$ 610 | 611 | CREATE PROCEDURE film_not_in_stock(IN p_film_id INT, IN p_store_id INT, OUT p_film_count INT) 612 | READS SQL DATA 613 | BEGIN 614 | SELECT inventory_id 615 | FROM inventory 616 | WHERE film_id = p_film_id 617 | AND store_id = p_store_id 618 | AND NOT inventory_in_stock(inventory_id); 619 | 620 | SELECT COUNT(*) 621 | FROM inventory 622 | WHERE film_id = p_film_id 623 | AND store_id = p_store_id 624 | AND NOT inventory_in_stock(inventory_id) 625 | INTO p_film_count; 626 | END $$ 627 | 628 | DELIMITER ; 629 | 630 | DELIMITER $$ 631 | 632 | CREATE FUNCTION inventory_held_by_customer(p_inventory_id INT) RETURNS INT 633 | READS SQL DATA 634 | BEGIN 635 | DECLARE v_customer_id INT; 636 | DECLARE EXIT HANDLER FOR NOT FOUND RETURN NULL; 637 | 638 | SELECT customer_id INTO v_customer_id 639 | FROM rental 640 | WHERE return_date IS NULL 641 | AND inventory_id = p_inventory_id; 642 | 643 | RETURN v_customer_id; 644 | END $$ 645 | 646 | DELIMITER ; 647 | 648 | DELIMITER $$ 649 | 650 | CREATE FUNCTION inventory_in_stock(p_inventory_id INT) RETURNS BOOLEAN 651 | READS SQL DATA 652 | BEGIN 653 | DECLARE v_rentals INT; 654 | DECLARE v_out INT; 655 | 656 | #AN ITEM IS IN-STOCK IF THERE ARE EITHER NO ROWS IN THE rental TABLE 657 | #FOR THE ITEM OR ALL ROWS HAVE return_date POPULATED 658 | 659 | SELECT COUNT(*) INTO v_rentals 660 | FROM rental 661 | WHERE inventory_id = p_inventory_id; 662 | 663 | IF v_rentals = 0 THEN 664 | RETURN TRUE; 665 | END IF; 666 | 667 | SELECT COUNT(rental_id) INTO v_out 668 | FROM inventory LEFT JOIN rental USING(inventory_id) 669 | WHERE inventory.inventory_id = p_inventory_id 670 | AND rental.return_date IS NULL; 671 | 672 | IF v_out > 0 THEN 673 | RETURN FALSE; 674 | ELSE 675 | RETURN TRUE; 676 | END IF; 677 | END $$ 678 | 679 | DELIMITER ; 680 | 681 | SET SQL_MODE=@OLD_SQL_MODE; 682 | SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS; 683 | SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS; 684 | 685 | 686 | -------------------------------------------------------------------------------- /hack/samples/postgres/dvdrental.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | HERE="$(dirname "${BASH_SOURCE[0]}")" 3 | POSTGRES_USER="${POSTGRES_USER:-postgres}" 4 | 5 | createdb --username=${POSTGRES_USER} --no-password dvdrental 6 | pg_restore --username=${POSTGRES_USER} --no-password -d dvdrental $HERE/dvdrental.tar 7 | -------------------------------------------------------------------------------- /hack/samples/postgres/dvdrental.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dungdm93/sqlalchemy-trino/b96e43c7745bb56e73e779e7b84c4456ac7047d9/hack/samples/postgres/dvdrental.tar -------------------------------------------------------------------------------- /hack/scripts/hive/hiveserver2-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | HERE=$(dirname "${BASH_SOURCE[0]}") 4 | 5 | METASTORE_HOST="hive-metastore" 6 | METASTORE_PORT="9083" 7 | 8 | "${HERE}/wait-for" "$METASTORE_HOST" "$METASTORE_PORT" 9 | 10 | # exec "$@" 11 | exec hive --service hiveserver2 --hiveconf hive.root.logger=INFO,console 12 | -------------------------------------------------------------------------------- /hack/scripts/hive/metastore-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | HERE=$(dirname "${BASH_SOURCE[0]}") 4 | 5 | DB_TYPE="postgres" 6 | DB_HOST="postgres" 7 | DB_PORT="5432" 8 | 9 | if [ -n "$DB_HOST" ]; then 10 | "${HERE}/wait-for" "$DB_HOST" "$DB_PORT" 11 | fi 12 | 13 | schematool -dbType "${DB_TYPE}" -upgradeSchema -verbose || 14 | schematool -dbType "${DB_TYPE}" -initSchema -verbose 15 | 16 | # exec "$@" 17 | exec hive --service metastore --verbose --hiveconf hive.root.logger=INFO,console 18 | -------------------------------------------------------------------------------- /hack/scripts/hive/wait-for: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | : ${NRETRY:=30} 3 | host=$1 4 | port=$2 5 | 6 | if ! command -v nc >/dev/null 2>&1; then 7 | echo >&2 "[$(date -Iseconds)] nc command not found" 8 | exit 1 9 | fi 10 | 11 | i=0 12 | while ! nc -z "$host" "$port" >/dev/null 2>&1; do 13 | ((i+=1)) 14 | 15 | if [ $i -ge $NRETRY ]; then 16 | echo >&2 "[$(date -Iseconds)] $host:$port still not reachable, giving up" 17 | exit 1 18 | fi 19 | 20 | echo "[$(date -Iseconds)] waiting for $host:$port... $i/$NRETRY" 21 | sleep 3 22 | done 23 | -------------------------------------------------------------------------------- /hack/scripts/superset/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eo pipefail 3 | 4 | case "$1" in 5 | webserver) 6 | superset-tools wait database 7 | superset db upgrade 8 | # https://docs.gunicorn.org/en/stable/settings.html 9 | if [ -n "$GUNICORN_CONFIG_PATH" ]; then 10 | GUNICORN_CMD_ARGS="--config=$GUNICORN_CONFIG_PATH ${GUNICORN_CMD_ARGS}" 11 | fi 12 | exec gunicorn --bind=0.0.0.0:8088 \ 13 | ${GUNICORN_CMD_ARGS} \ 14 | "superset.app:create_app()" 15 | ;; 16 | worker|beat|flower) 17 | superset-tools wait broker 18 | # https://superset.apache.org/docs/installation/async-queries-celery 19 | # https://docs.celeryproject.org/en/stable/userguide/configuration.html 20 | # https://docs.celeryproject.org/en/stable/reference/cli.html#celery-worker 21 | if [ -n "$CELERY_CONFIG_PATH" ]; then 22 | CELERY_CMD_ARGS="--config=$CELERY_CONFIG_PATH ${CELERY_CMD_ARGS}" 23 | fi 24 | exec celery "$1" ${CELERY_CMD_ARGS} \ 25 | "--app=superset.tasks.celery_app:app" 26 | ;; 27 | version) 28 | superset version 29 | ;; 30 | *) 31 | exec "$@" 32 | ;; 33 | esac 34 | -------------------------------------------------------------------------------- /hack/scripts/superset/superset-tools: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import sys 4 | import time 5 | from datetime import datetime 6 | 7 | from superset import config 8 | 9 | 10 | def wait_for_database(retries: int, interval: float): 11 | from sqlalchemy import create_engine, sql 12 | from sqlalchemy.engine.base import Engine 13 | from sqlalchemy.exc import OperationalError 14 | 15 | engine: Engine = create_engine(config.SQLALCHEMY_DATABASE_URI) 16 | uri = repr(engine.url) 17 | for i in range(retries): 18 | try: 19 | engine.execute(sql.text('SELECT 1')).scalar() 20 | except OperationalError: 21 | print(f'[{datetime.now()}] waiting for database at {uri}... Try {i + 1}/{retries}', file=sys.stderr) 22 | time.sleep(interval) 23 | else: 24 | print(f'[{datetime.now()}] Connect to database at {uri} successfully') 25 | return 26 | 27 | sys.exit('SQLAlchemy database still unreachable, giving up') 28 | 29 | 30 | def wait_for_broker(retries: int, interval: float): 31 | from kombu.connection import Connection 32 | from kombu.exceptions import OperationalError 33 | 34 | class errback: 35 | def __init__(self, uri: str, max_retries: int): 36 | self.uri = uri 37 | self.count = 0 38 | self.max_retries = max_retries 39 | 40 | def __call__(self, exc, interval): 41 | self.count += 1 42 | msg = f'[{datetime.now()}] waiting for broker at {self.uri}... Try {self.count}/{self.max_retries}' 43 | print(msg, file=sys.stderr) 44 | 45 | try: 46 | conn = Connection(config.CELERY_CONFIG.BROKER_URL) 47 | uri = conn.as_uri() 48 | conn.ensure_connection(errback=errback(uri, max_retries=retries), 49 | max_retries=retries, interval_start=interval, interval_max=interval) 50 | print(f'[{datetime.now()}] Connect to broker at {uri} successfully') 51 | except OperationalError: 52 | sys.exit('Celery broker still unreachable, giving up') 53 | 54 | 55 | def build_argparse(): 56 | def wait_service(args: argparse.Namespace): 57 | if args.service == 'database': 58 | wait_for_database(args.retries, args.interval) 59 | elif args.service == 'broker': 60 | wait_for_broker(args.retries, args.interval) 61 | else: 62 | raise ValueError(f'Unknown service {args.service}') 63 | 64 | parser = argparse.ArgumentParser() 65 | subparsers = parser.add_subparsers(title='sub-commands') 66 | 67 | # wait sub-command 68 | wait = subparsers.add_parser('wait', help='Waiting for external service start-up') 69 | wait.add_argument('service', type=str, choices=['database', 'broker'], 70 | help='Service that waiting for') 71 | wait.add_argument('--retries', type=int, default=30, help='Maximum number of times to retry') 72 | wait.add_argument('--interval', type=float, default=3, help='The number of seconds sleeping for') 73 | wait.set_defaults(func=wait_service) 74 | 75 | return parser 76 | 77 | 78 | def main(): 79 | parser = build_argparse() 80 | args = parser.parse_args() 81 | args.func(args) 82 | 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /hack/scripts/superset/trino.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Optional 3 | 4 | from superset.db_engine_specs import BaseEngineSpec 5 | from superset.utils import core as utils 6 | 7 | 8 | class TrinoEngineSpec(BaseEngineSpec): 9 | engine = "trino" 10 | engine_name = "Trino" 11 | 12 | # pylint: disable=line-too-long 13 | _time_grain_expressions = { 14 | None: "{col}", 15 | "PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))", 16 | "PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))", 17 | "PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))", 18 | "P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))", 19 | "P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", 20 | "P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))", 21 | "P0.25Y": "date_trunc('quarter', CAST({col} AS TIMESTAMP))", 22 | "P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))", 23 | # "1969-12-28T00:00:00Z/P1W", # Week starting Sunday 24 | # "1969-12-29T00:00:00Z/P1W", # Week starting Monday 25 | # "P1W/1970-01-03T00:00:00Z", # Week ending Saturday 26 | # "P1W/1970-01-04T00:00:00Z", # Week ending Sunday 27 | } 28 | 29 | @classmethod 30 | def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]: 31 | tt = target_type.upper() 32 | if tt == utils.TemporalType.DATE: 33 | value = dttm.date().isoformat() 34 | return f"from_iso8601_date('{value}')" 35 | if tt == utils.TemporalType.TIMESTAMP: 36 | value = dttm.isoformat(timespec='microseconds') 37 | return f"from_iso8601_timestamp('{value}')" 38 | return None 39 | 40 | @classmethod 41 | def epoch_to_dttm(cls) -> str: 42 | return "from_unixtime({col})" 43 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | SQLAlchemy~=1.3 2 | trino~=0.306 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="sqlalchemy-trino", 8 | version="0.5.0", 9 | author="Dũng Đặng Minh", 10 | author_email="dungdm93@live.com", 11 | description="Trino dialect for SQLAlchemy", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/dungdm93/sqlalchemy-trino", 15 | keywords=["sqlalchemy", "trino"], 16 | license="Apache 2.0", 17 | platforms=["any"], 18 | classifiers=[ 19 | "Intended Audience :: Developers", 20 | "License :: OSI Approved :: Apache Software License", 21 | "Operating System :: OS Independent", 22 | "Programming Language :: Python", 23 | "Programming Language :: Python :: 3", 24 | "Programming Language :: Python :: 3.7", 25 | "Programming Language :: Python :: 3.8", 26 | "Programming Language :: Python :: 3.9", 27 | "Programming Language :: Python :: Implementation :: CPython", 28 | "Programming Language :: Python :: Implementation :: PyPy", 29 | "Topic :: Database", 30 | "Topic :: Database :: Front-Ends", 31 | ], 32 | python_requires='>=3.7', 33 | install_requires=[ 34 | "trino[sqlalchemy]>=0.310", 35 | ], 36 | ) 37 | -------------------------------------------------------------------------------- /sqlalchemy_trino/__init__.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.dialects import registry 2 | 3 | __version__ = '0.4.0' 4 | registry.register("trino", "sqlalchemy_trino.dialect", "TrinoDialect") 5 | -------------------------------------------------------------------------------- /sqlalchemy_trino/compiler.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.sql import compiler 2 | 3 | # https://trino.io/docs/current/language/reserved.html 4 | RESERVED_WORDS = { 5 | "alter", 6 | "and", 7 | "as", 8 | "between", 9 | "by", 10 | "case", 11 | "cast", 12 | "constraint", 13 | "create", 14 | "cross", 15 | "cube", 16 | "current_catalog", 17 | "current_date", 18 | "current_path", 19 | "current_role", 20 | "current_schema", 21 | "current_time", 22 | "current_timestamp", 23 | "current_user", 24 | "deallocate", 25 | "delete", 26 | "describe", 27 | "distinct", 28 | "drop", 29 | "else", 30 | "end", 31 | "escape", 32 | "except", 33 | "execute", 34 | "exists", 35 | "extract", 36 | "false", 37 | "for", 38 | "from", 39 | "full", 40 | "group", 41 | "grouping", 42 | "having", 43 | "in", 44 | "inner", 45 | "insert", 46 | "intersect", 47 | "into", 48 | "is", 49 | "join", 50 | "left", 51 | "like", 52 | "localtime", 53 | "localtimestamp", 54 | "natural", 55 | "normalize", 56 | "not", 57 | "null", 58 | "on", 59 | "or", 60 | "order", 61 | "outer", 62 | "prepare", 63 | "recursive", 64 | "right", 65 | "rollup", 66 | "select", 67 | "skip", 68 | "table", 69 | "then", 70 | "true", 71 | "uescape", 72 | "union", 73 | "unnest", 74 | "using", 75 | "values", 76 | "when", 77 | "where", 78 | "with", 79 | } 80 | 81 | 82 | class TrinoSQLCompiler(compiler.SQLCompiler): 83 | 84 | def limit_clause(self, select, **kw): 85 | """ 86 | Trino support only OFFSET...LIMIT but not LIMIT...OFFSET syntax. 87 | See https://github.com/trinodb/trino/issues/4335. 88 | """ 89 | text = "" 90 | if select._offset_clause is not None: 91 | text += " OFFSET " + self.process(select._offset_clause, **kw) 92 | if select._limit_clause is not None: 93 | text += "\n LIMIT " + self.process(select._limit_clause, **kw) 94 | return text 95 | 96 | 97 | class TrinoDDLCompiler(compiler.DDLCompiler): 98 | pass 99 | 100 | 101 | class TrinoTypeCompiler(compiler.GenericTypeCompiler): 102 | def visit_FLOAT(self, type_, **kw): 103 | precision = type_.precision or 32 104 | if 0 <= precision <= 32: 105 | return self.visit_REAL(type_, **kw) 106 | elif 32 < precision <= 64: 107 | return self.visit_DOUBLE(type_, **kw) 108 | else: 109 | raise ValueError(f"type.precision must be in range [0, 64], got {type_.precision}") 110 | 111 | def visit_DOUBLE(self, type_, **kw): 112 | return "DOUBLE" 113 | 114 | def visit_NUMERIC(self, type_, **kw): 115 | return self.visit_DECIMAL(type_, **kw) 116 | 117 | def visit_NCHAR(self, type_, **kw): 118 | return self.visit_CHAR(type_, **kw) 119 | 120 | def visit_NVARCHAR(self, type_, **kw): 121 | return self.visit_VARCHAR(type_, **kw) 122 | 123 | def visit_TEXT(self, type_, **kw): 124 | return self.visit_VARCHAR(type_, **kw) 125 | 126 | def visit_BINARY(self, type_, **kw): 127 | return self.visit_VARBINARY(type_, **kw) 128 | 129 | def visit_CLOB(self, type_, **kw): 130 | return self.visit_VARCHAR(type_, **kw) 131 | 132 | def visit_NCLOB(self, type_, **kw): 133 | return self.visit_VARCHAR(type_, **kw) 134 | 135 | def visit_BLOB(self, type_, **kw): 136 | return self.visit_VARBINARY(type_, **kw) 137 | 138 | def visit_DATETIME(self, type_, **kw): 139 | return self.visit_TIMESTAMP(type_, **kw) 140 | 141 | 142 | class TrinoIdentifierPreparer(compiler.IdentifierPreparer): 143 | reserved_words = RESERVED_WORDS 144 | -------------------------------------------------------------------------------- /sqlalchemy_trino/datatype.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import * 3 | 4 | from sqlalchemy import util 5 | from sqlalchemy.sql import sqltypes 6 | from sqlalchemy.sql.type_api import TypeEngine 7 | 8 | SQLType = Union[TypeEngine, Type[TypeEngine]] 9 | 10 | 11 | class DOUBLE(sqltypes.Float): 12 | __visit_name__ = "DOUBLE" 13 | 14 | 15 | class MAP(TypeEngine): 16 | __visit_name__ = "MAP" 17 | 18 | def __init__(self, key_type: SQLType, value_type: SQLType): 19 | if isinstance(key_type, type): 20 | key_type = key_type() 21 | self.key_type: TypeEngine = key_type 22 | 23 | if isinstance(value_type, type): 24 | value_type = value_type() 25 | self.value_type: TypeEngine = value_type 26 | 27 | @property 28 | def python_type(self): 29 | return dict 30 | 31 | 32 | class ROW(TypeEngine): 33 | __visit_name__ = "ROW" 34 | 35 | def __init__(self, attr_types: List[Tuple[Optional[str], SQLType]]): 36 | self.attr_types: List[Tuple[Optional[str], SQLType]] = [] 37 | for attr_name, attr_type in attr_types: 38 | if isinstance(attr_type, type): 39 | attr_type = attr_type() 40 | self.attr_types.append((attr_name, attr_type)) 41 | 42 | @property 43 | def python_type(self): 44 | return list 45 | 46 | 47 | # https://trino.io/docs/current/language/types.html 48 | _type_map = { 49 | # === Boolean === 50 | 'boolean': sqltypes.BOOLEAN, 51 | 52 | # === Integer === 53 | 'tinyint': sqltypes.SMALLINT, 54 | 'smallint': sqltypes.SMALLINT, 55 | 'int': sqltypes.INTEGER, 56 | 'integer': sqltypes.INTEGER, 57 | 'bigint': sqltypes.BIGINT, 58 | 59 | # === Floating-point === 60 | 'real': sqltypes.REAL, 61 | 'double': DOUBLE, 62 | 63 | # === Fixed-precision === 64 | 'decimal': sqltypes.DECIMAL, 65 | 66 | # === String === 67 | 'varchar': sqltypes.VARCHAR, 68 | 'char': sqltypes.CHAR, 69 | 'varbinary': sqltypes.VARBINARY, 70 | 'json': sqltypes.JSON, 71 | 72 | # === Date and time === 73 | 'date': sqltypes.DATE, 74 | 'time': sqltypes.TIME, 75 | 'timestamp': sqltypes.TIMESTAMP, 76 | 77 | # 'interval year to month': 78 | # 'interval day to second': 79 | # 80 | # === Structural === 81 | # 'array': ARRAY, 82 | # 'map': MAP 83 | # 'row': ROW 84 | # 85 | # === Mixed === 86 | # 'ipaddress': IPADDRESS 87 | # 'uuid': UUID, 88 | # 'hyperloglog': HYPERLOGLOG, 89 | # 'p4hyperloglog': P4HYPERLOGLOG, 90 | # 'qdigest': QDIGEST, 91 | # 'tdigest': TDIGEST, 92 | } 93 | 94 | 95 | def unquote(string: str, quote: str = '"', escape: str = '\\') -> str: 96 | """ 97 | If string starts and ends with a quote, unquote it 98 | """ 99 | if string.startswith(quote) and string.endswith(quote): 100 | string = string[1:-1] 101 | string = string.replace(f"{escape}{quote}", quote) \ 102 | .replace(f"{escape}{escape}", escape) 103 | return string 104 | 105 | 106 | def aware_split(string: str, delimiter: str = ',', maxsplit: int = -1, 107 | quote: str = '"', escaped_quote: str = r'\"', 108 | open_bracket: str = '(', close_bracket: str = ')') -> Iterator[str]: 109 | """ 110 | A split function that is aware of quotes and brackets/parentheses. 111 | 112 | :param string: string to split 113 | :param delimiter: string defining where to split, usually a comma or space 114 | :param maxsplit: Maximum number of splits to do. -1 (default) means no limit. 115 | :param quote: string, either a single or a double quote 116 | :param escaped_quote: string representing an escaped quote 117 | :param open_bracket: string, either [, {, < or ( 118 | :param close_bracket: string, either ], }, > or ) 119 | """ 120 | parens = 0 121 | quotes = False 122 | i = 0 123 | if maxsplit < -1: 124 | raise ValueError(f"maxsplit must be >= -1, got {maxsplit}") 125 | elif maxsplit == 0: 126 | yield string 127 | return 128 | for j, character in enumerate(string): 129 | complete = parens == 0 and not quotes 130 | if complete and character == delimiter: 131 | if maxsplit != -1: 132 | maxsplit -= 1 133 | yield string[i:j] 134 | i = j + len(delimiter) 135 | if maxsplit == 0: 136 | break 137 | elif character == open_bracket: 138 | parens += 1 139 | elif character == close_bracket: 140 | parens -= 1 141 | elif character == quote: 142 | if quotes and string[j - len(escaped_quote) + 1: j + 1] != escaped_quote: 143 | quotes = False 144 | elif not quotes: 145 | quotes = True 146 | yield string[i:] 147 | 148 | 149 | def parse_sqltype(type_str: str) -> TypeEngine: 150 | type_str = type_str.strip().lower() 151 | match = re.match(r'^(?P\w+)\s*(?:\((?P.*)\))?', type_str) 152 | if not match: 153 | util.warn(f"Could not parse type name '{type_str}'") 154 | return sqltypes.NULLTYPE 155 | type_name = match.group("type") 156 | type_opts = match.group("options") 157 | 158 | if type_name == "array": 159 | item_type = parse_sqltype(type_opts) 160 | if isinstance(item_type, sqltypes.ARRAY): 161 | dimensions = (item_type.dimensions or 1) + 1 162 | return sqltypes.ARRAY(item_type.item_type, dimensions=dimensions) 163 | return sqltypes.ARRAY(item_type) 164 | elif type_name == "map": 165 | key_type_str, value_type_str = aware_split(type_opts) 166 | key_type = parse_sqltype(key_type_str) 167 | value_type = parse_sqltype(value_type_str) 168 | return MAP(key_type, value_type) 169 | elif type_name == "row": 170 | attr_types: List[Tuple[Optional[str], SQLType]] = [] 171 | for attr in aware_split(type_opts): 172 | attr_name, attr_type_str = aware_split(attr.strip(), delimiter=' ', maxsplit=1) 173 | attr_name = unquote(attr_name) 174 | attr_type = parse_sqltype(attr_type_str) 175 | attr_types.append((attr_name, attr_type)) 176 | return ROW(attr_types) 177 | 178 | if type_name not in _type_map: 179 | util.warn(f"Did not recognize type '{type_name}'") 180 | return sqltypes.NULLTYPE 181 | type_class = _type_map[type_name] 182 | type_args = [int(o.strip()) for o in type_opts.split(',')] if type_opts else [] 183 | if type_name in ('time', 'timestamp'): 184 | type_kwargs = dict(timezone=type_str.endswith("with time zone")) 185 | return type_class(**type_kwargs) # TODO: handle time/timestamp(p) precision 186 | return type_class(*type_args) 187 | -------------------------------------------------------------------------------- /sqlalchemy_trino/dbapi.py: -------------------------------------------------------------------------------- 1 | """ 2 | ``trino.dbapi`` module don't expose exceptions 3 | """ 4 | 5 | # https://www.python.org/dev/peps/pep-0249/#globals 6 | from trino.dbapi import ( # noqa 7 | apilevel, 8 | threadsafety, 9 | paramstyle, 10 | connect, 11 | Connection, 12 | Cursor 13 | ) 14 | 15 | # https://www.python.org/dev/peps/pep-0249/#exceptions 16 | from trino.exceptions import ( # noqa 17 | Warning, 18 | Error, 19 | InterfaceError, 20 | DatabaseError, 21 | DataError, 22 | OperationalError, 23 | IntegrityError, 24 | InternalError, 25 | ProgrammingError, 26 | NotSupportedError, 27 | ) 28 | 29 | from trino.transaction import ( # noqa 30 | Transaction, 31 | IsolationLevel 32 | ) 33 | -------------------------------------------------------------------------------- /sqlalchemy_trino/dialect.py: -------------------------------------------------------------------------------- 1 | import re 2 | from textwrap import dedent 3 | from typing import * 4 | 5 | from sqlalchemy import exc, sql 6 | from sqlalchemy.engine.base import Connection 7 | from sqlalchemy.engine.default import DefaultDialect, DefaultExecutionContext 8 | from sqlalchemy.engine.url import URL 9 | from trino.auth import BasicAuthentication, JWTAuthentication 10 | from trino.constants import HTTPS 11 | from trino.dbapi import Cursor 12 | 13 | from . import compiler 14 | from . import datatype 15 | from . import dbapi as trino_dbapi 16 | from . import error 17 | 18 | 19 | class TrinoDialect(DefaultDialect): 20 | name = 'trino' 21 | driver = 'rest' 22 | 23 | statement_compiler = compiler.TrinoSQLCompiler 24 | ddl_compiler = compiler.TrinoDDLCompiler 25 | type_compiler = compiler.TrinoTypeCompiler 26 | preparer = compiler.TrinoIdentifierPreparer 27 | 28 | # Data Type 29 | supports_native_enum = False 30 | supports_native_boolean = True 31 | supports_native_decimal = True 32 | 33 | # Column options 34 | supports_sequences = False 35 | supports_comments = True 36 | inline_comments = True 37 | supports_default_values = False 38 | 39 | # DDL 40 | supports_alter = True 41 | 42 | # DML 43 | supports_empty_insert = False 44 | supports_multivalues_insert = True 45 | postfetch_lastrowid = False 46 | 47 | # Version parser 48 | __version_pattern = re.compile(r'(\d+).*') 49 | 50 | @classmethod 51 | def dbapi(cls): 52 | """ 53 | ref: https://www.python.org/dev/peps/pep-0249/#module-interface 54 | """ 55 | return trino_dbapi 56 | 57 | def create_connect_args(self, url: URL) -> Tuple[List[Any], Dict[str, Any]]: 58 | args, kwargs = super(TrinoDialect, self).create_connect_args(url) # type: List[Any], Dict[str, Any] 59 | 60 | db_parts = kwargs.pop('database', 'system').split('/') 61 | if len(db_parts) == 1: 62 | kwargs['catalog'] = db_parts[0] 63 | elif len(db_parts) == 2: 64 | kwargs['catalog'] = db_parts[0] 65 | kwargs['schema'] = db_parts[1] 66 | else: 67 | raise ValueError(f'Unexpected database format {url.database}') 68 | 69 | username = kwargs.pop('username', 'anonymous') 70 | session_user = kwargs.pop('sessionUser', username) 71 | kwargs['user'] = session_user 72 | 73 | password = kwargs.pop('password', None) 74 | jwt_token = kwargs.pop('accessToken', None) 75 | if password: 76 | kwargs['auth'] = BasicAuthentication(username, password) 77 | elif jwt_token: 78 | kwargs['auth'] = JWTAuthentication(jwt_token) 79 | 80 | if 'auth' in kwargs: 81 | kwargs['http_scheme'] = HTTPS 82 | 83 | return args, kwargs 84 | 85 | def get_columns(self, connection: Connection, 86 | table_name: str, schema: str = None, **kw) -> List[Dict[str, Any]]: 87 | if not self.has_table(connection, table_name, schema): 88 | raise exc.NoSuchTableError(f'schema={schema}, table={table_name}') 89 | return self._get_columns(connection, table_name, schema, **kw) 90 | 91 | def _get_columns(self, connection: Connection, 92 | table_name: str, schema: str = None, **kw) -> List[Dict[str, Any]]: 93 | schema = schema or self._get_default_schema_name(connection) 94 | query = dedent(''' 95 | SELECT 96 | "column_name", 97 | "data_type", 98 | "column_default", 99 | UPPER("is_nullable") AS "is_nullable" 100 | FROM "information_schema"."columns" 101 | WHERE "table_schema" = :schema 102 | AND "table_name" = :table 103 | ORDER BY "ordinal_position" ASC 104 | ''').strip() 105 | res = connection.execute(sql.text(query), schema=schema, table=table_name) 106 | columns = [] 107 | for record in res: 108 | column = dict( 109 | name=record.column_name, 110 | type=datatype.parse_sqltype(record.data_type), 111 | nullable=record.is_nullable == 'YES', 112 | default=record.column_default, 113 | ) 114 | columns.append(column) 115 | return columns 116 | 117 | def get_pk_constraint(self, connection: Connection, 118 | table_name: str, schema: str = None, **kw) -> Dict[str, Any]: 119 | """Trino has no support for primary keys. Returns a dummy""" 120 | return dict(name=None, constrained_columns=[]) 121 | 122 | def get_primary_keys(self, connection: Connection, 123 | table_name: str, schema: str = None, **kw) -> List[str]: 124 | pk = self.get_pk_constraint(connection, table_name, schema) 125 | return pk.get('constrained_columns') # type: List[str] 126 | 127 | def get_foreign_keys(self, connection: Connection, 128 | table_name: str, schema: str = None, **kw) -> List[Dict[str, Any]]: 129 | """Trino has no support for foreign keys. Returns an empty list.""" 130 | return [] 131 | 132 | def get_schema_names(self, connection: Connection, **kw) -> List[str]: 133 | query = dedent(''' 134 | SELECT "schema_name" 135 | FROM "information_schema"."schemata" 136 | ''').strip() 137 | res = connection.execute(sql.text(query)) 138 | return [row.schema_name for row in res] 139 | 140 | def get_table_names(self, connection: Connection, schema: str = None, **kw) -> List[str]: 141 | schema = schema or self._get_default_schema_name(connection) 142 | if schema is None: 143 | raise exc.NoSuchTableError('schema is required') 144 | query = dedent(''' 145 | SELECT "table_name" 146 | FROM "information_schema"."tables" 147 | WHERE "table_schema" = :schema 148 | ''').strip() 149 | res = connection.execute(sql.text(query), schema=schema) 150 | return [row.table_name for row in res] 151 | 152 | def get_temp_table_names(self, connection: Connection, schema: str = None, **kw) -> List[str]: 153 | """Trino has no support for temporary tables. Returns an empty list.""" 154 | return [] 155 | 156 | def get_view_names(self, connection: Connection, schema: str = None, **kw) -> List[str]: 157 | schema = schema or self._get_default_schema_name(connection) 158 | if schema is None: 159 | raise exc.NoSuchTableError('schema is required') 160 | query = dedent(''' 161 | SELECT "table_name" 162 | FROM "information_schema"."views" 163 | WHERE "table_schema" = :schema 164 | ''').strip() 165 | res = connection.execute(sql.text(query), schema=schema) 166 | return [row.table_name for row in res] 167 | 168 | def get_temp_view_names(self, connection: Connection, schema: str = None, **kw) -> List[str]: 169 | """Trino has no support for temporary views. Returns an empty list.""" 170 | return [] 171 | 172 | def get_view_definition(self, connection: Connection, view_name: str, schema: str = None, **kw) -> str: 173 | schema = schema or self._get_default_schema_name(connection) 174 | if schema is None: 175 | raise exc.NoSuchTableError('schema is required') 176 | query = dedent(''' 177 | SELECT "view_definition" 178 | FROM "information_schema"."views" 179 | WHERE "table_schema" = :schema 180 | AND "table_name" = :view 181 | ''').strip() 182 | res = connection.execute(sql.text(query), schema=schema, view=view_name) 183 | return res.scalar() 184 | 185 | def get_indexes(self, connection: Connection, 186 | table_name: str, schema: str = None, **kw) -> List[Dict[str, Any]]: 187 | if not self.has_table(connection, table_name, schema): 188 | raise exc.NoSuchTableError(f'schema={schema}, table={table_name}') 189 | 190 | partitioned_columns = self._get_columns(connection, f'{table_name}$partitions', schema, **kw) 191 | partition_index = dict( 192 | name='partition', 193 | column_names=[col['name'] for col in partitioned_columns], 194 | unique=False 195 | ) 196 | return [partition_index, ] 197 | 198 | def get_sequence_names(self, connection: Connection, schema: str = None, **kw) -> List[str]: 199 | """Trino has no support for sequences. Returns an empty list.""" 200 | return [] 201 | 202 | def get_unique_constraints(self, connection: Connection, 203 | table_name: str, schema: str = None, **kw) -> List[Dict[str, Any]]: 204 | """Trino has no support for unique constraints. Returns an empty list.""" 205 | return [] 206 | 207 | def get_check_constraints(self, connection: Connection, 208 | table_name: str, schema: str = None, **kw) -> List[Dict[str, Any]]: 209 | """Trino has no support for check constraints. Returns an empty list.""" 210 | return [] 211 | 212 | def get_table_comment(self, connection: Connection, 213 | table_name: str, schema: str = None, **kw) -> Dict[str, Any]: 214 | properties_table = self._get_full_table(f'{table_name}$properties', schema) 215 | query = f'SELECT "comment" FROM {properties_table}' 216 | try: 217 | res = connection.execute(sql.text(query)) 218 | return dict(text=res.scalar()) 219 | except error.TrinoQueryError as e: 220 | if e.error_name in ( 221 | error.NOT_FOUND, 222 | error.COLUMN_NOT_FOUND, 223 | error.TABLE_NOT_FOUND, 224 | error.NOT_SUPPORTED 225 | ): 226 | return dict(text=None) 227 | raise 228 | 229 | def has_schema(self, connection: Connection, schema: str) -> bool: 230 | query = dedent(''' 231 | SELECT "schema_name" 232 | FROM "information_schema"."schemata" 233 | WHERE "schema_name" = :schema 234 | ''').strip() 235 | res = connection.execute(sql.text(query), schema=schema) 236 | return res.first() is not None 237 | 238 | def has_table(self, connection: Connection, 239 | table_name: str, schema: str = None, **kw) -> bool: 240 | schema = schema or self._get_default_schema_name(connection) 241 | if schema is None: 242 | return False 243 | query = dedent(''' 244 | SELECT "table_name" 245 | FROM "information_schema"."tables" 246 | WHERE "table_schema" = :schema 247 | AND "table_name" = :table 248 | ''').strip() 249 | res = connection.execute(sql.text(query), schema=schema, table=table_name) 250 | return res.first() is not None 251 | 252 | def has_sequence(self, connection: Connection, 253 | sequence_name: str, schema: str = None, **kw) -> bool: 254 | """Trino has no support for sequence. Returns False indicate that given sequence does not exists.""" 255 | return False 256 | 257 | def _get_server_version_info(self, connection: Connection) -> Tuple[int, ...]: 258 | query = 'SELECT version()' 259 | res = connection.execute(sql.text(query)).scalar() 260 | match = self.__version_pattern.match(res) 261 | version = int(match.group(1)) if match else 0 262 | return tuple([version]) 263 | 264 | def _get_default_schema_name(self, connection: Connection) -> Optional[str]: 265 | dbapi_connection: trino_dbapi.Connection = connection.connection 266 | return dbapi_connection.schema 267 | 268 | def do_execute(self, cursor: Cursor, statement: str, parameters: Tuple[Any, ...], 269 | context: DefaultExecutionContext = None): 270 | cursor.execute(statement, parameters) 271 | if context and context.should_autocommit: 272 | # SQL statement only submitted to Trino server when cursor.fetch*() is called. 273 | # For DDL (CREATE/ALTER/DROP) and DML (INSERT/UPDATE/DELETE) statement, call cursor.description 274 | # to force submit statement immediately. 275 | cursor.description # noqa 276 | 277 | def do_rollback(self, dbapi_connection: trino_dbapi.Connection): 278 | if dbapi_connection.transaction is not None: 279 | dbapi_connection.rollback() 280 | 281 | def set_isolation_level(self, dbapi_conn: trino_dbapi.Connection, level: str) -> None: 282 | dbapi_conn._isolation_level = getattr(trino_dbapi.IsolationLevel, level) 283 | 284 | def get_isolation_level(self, dbapi_conn: trino_dbapi.Connection) -> str: 285 | level_names = ['AUTOCOMMIT', 286 | 'READ_UNCOMMITTED', 287 | 'READ_COMMITTED', 288 | 'REPEATABLE_READ', 289 | 'SERIALIZABLE'] 290 | return level_names[dbapi_conn.isolation_level] 291 | 292 | def _get_full_table(self, table_name: str, schema: str = None, quote: bool = True) -> str: 293 | table_part = self.identifier_preparer.quote_identifier(table_name) if quote else table_name 294 | if schema: 295 | schema_part = self.identifier_preparer.quote_identifier(schema) if quote else schema 296 | return f'{schema_part}.{table_part}' 297 | 298 | return table_part 299 | -------------------------------------------------------------------------------- /sqlalchemy_trino/error.py: -------------------------------------------------------------------------------- 1 | from trino.exceptions import ( # noqa 2 | TrinoQueryError 3 | ) 4 | 5 | # ref: https://github.com/trinodb/trino/blob/master/core/trino-spi/src/main/java/io/trino/spi/StandardErrorCode.java 6 | NOT_FOUND = 'NOT_FOUND' 7 | COLUMN_NOT_FOUND = 'COLUMN_NOT_FOUND' 8 | TABLE_NOT_FOUND = 'TABLE_NOT_FOUND' 9 | SCHEMA_NOT_FOUND = 'SCHEMA_NOT_FOUND' 10 | CATALOG_NOT_FOUND = 'CATALOG_NOT_FOUND' 11 | NOT_SUPPORTED = 'NOT_SUPPORTED' 12 | 13 | MISSING_TABLE = 'MISSING_TABLE' 14 | MISSING_COLUMN_NAME = 'MISSING_COLUMN_NAME' 15 | MISSING_SCHEMA_NAME = 'MISSING_SCHEMA_NAME' 16 | MISSING_CATALOG_NAME = 'MISSING_CATALOG_NAME' 17 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dungdm93/sqlalchemy-trino/b96e43c7745bb56e73e779e7b84c4456ac7047d9/tests/__init__.py -------------------------------------------------------------------------------- /tests/assertions.py: -------------------------------------------------------------------------------- 1 | from assertpy import add_extension, assert_that 2 | from sqlalchemy.sql.sqltypes import ARRAY 3 | 4 | from sqlalchemy_trino.datatype import SQLType, MAP, ROW 5 | 6 | 7 | def assert_sqltype(this: SQLType, that: SQLType): 8 | if isinstance(this, type): 9 | this = this() 10 | if isinstance(that, type): 11 | that = that() 12 | assert_that(type(this)).is_same_as(type(that)) 13 | if isinstance(this, ARRAY): 14 | assert_sqltype(this.item_type, that.item_type) 15 | if this.dimensions is None or this.dimensions == 1: 16 | # ARRAY(dimensions=None) == ARRAY(dimensions=1) 17 | assert_that(that.dimensions).is_in(None, 1) 18 | else: 19 | assert_that(this.dimensions).is_equal_to(this.dimensions) 20 | elif isinstance(this, MAP): 21 | assert_sqltype(this.key_type, that.key_type) 22 | assert_sqltype(this.value_type, that.value_type) 23 | elif isinstance(this, ROW): 24 | assert_that(len(this.attr_types)).is_equal_to(len(that.attr_types)) 25 | for (this_attr, that_attr) in zip(this.attr_types, that.attr_types): 26 | assert_that(this_attr[0]).is_equal_to(that_attr[0]) 27 | assert_sqltype(this_attr[1], that_attr[1]) 28 | else: 29 | assert_that(str(this)).is_equal_to(str(that)) 30 | 31 | 32 | @add_extension 33 | def is_sqltype(self, that): 34 | this = self.val 35 | assert_sqltype(this, that) 36 | -------------------------------------------------------------------------------- /tests/data/population.csv: -------------------------------------------------------------------------------- 1 | zipcode,population,area 2 | 94107,26599,6.11 3 | 94105,5846,1.23 4 | 94129,3183,6.11 5 | 94121,41203,5.69 6 | 94118,38319,4.07 7 | 94123,23088,2.74 8 | 94133,26237,1.91 9 | 94109,55984,2.74 10 | 94111,3713,0.9 11 | 94104,406,0.18 12 | -------------------------------------------------------------------------------- /tests/test_compiler.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Table, MetaData, Column, Integer, String, select 2 | 3 | from sqlalchemy_trino.dialect import TrinoDialect 4 | 5 | metadata = MetaData() 6 | table = Table( 7 | 'table', 8 | metadata, 9 | Column('id', Integer, primary_key=True), 10 | Column('name', String), 11 | ) 12 | 13 | 14 | def test_limit_offset(): 15 | statement = select(table).limit(10).offset(0) 16 | query = statement.compile(dialect=TrinoDialect()) 17 | assert str(query) == 'SELECT "table".id, "table".name \nFROM "table" OFFSET :param_1\n LIMIT :param_2' 18 | 19 | 20 | def test_limit(): 21 | statement = select(table).limit(10) 22 | query = statement.compile(dialect=TrinoDialect()) 23 | assert str(query) == 'SELECT "table".id, "table".name \nFROM "table"\n LIMIT :param_1' 24 | 25 | 26 | def test_offset(): 27 | statement = select(table).offset(0) 28 | query = statement.compile(dialect=TrinoDialect()) 29 | assert str(query) == 'SELECT "table".id, "table".name \nFROM "table" OFFSET :param_1' 30 | -------------------------------------------------------------------------------- /tests/test_datatype_parse.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from assertpy import assert_that 3 | from sqlalchemy.sql.sqltypes import * 4 | from sqlalchemy.sql.type_api import TypeEngine 5 | 6 | from sqlalchemy_trino import datatype 7 | from sqlalchemy_trino.datatype import MAP, ROW 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'type_str, sql_type', 12 | datatype._type_map.items(), 13 | ids=datatype._type_map.keys() 14 | ) 15 | def test_parse_simple_type(type_str: str, sql_type: TypeEngine): 16 | actual_type = datatype.parse_sqltype(type_str) 17 | if not isinstance(actual_type, type): 18 | actual_type = type(actual_type) 19 | assert_that(actual_type).is_equal_to(sql_type) 20 | 21 | 22 | parse_cases_testcases = { 23 | 'char(10)': CHAR(10), 24 | 'Char(10)': CHAR(10), 25 | 'VARCHAR(10)': VARCHAR(10), 26 | 'varCHAR(10)': VARCHAR(10), 27 | 'VARchar(10)': VARCHAR(10), 28 | } 29 | 30 | 31 | @pytest.mark.parametrize( 32 | 'type_str, sql_type', 33 | parse_cases_testcases.items(), 34 | ids=parse_cases_testcases.keys() 35 | ) 36 | def test_parse_cases(type_str: str, sql_type: TypeEngine): 37 | actual_type = datatype.parse_sqltype(type_str) 38 | assert_that(actual_type).is_sqltype(sql_type) 39 | 40 | 41 | parse_type_options_testcases = { 42 | 'CHAR(10)': CHAR(10), 43 | 'VARCHAR(10)': VARCHAR(10), 44 | 'DECIMAL(20)': DECIMAL(20), 45 | 'DECIMAL(20, 3)': DECIMAL(20, 3), 46 | # TODO 47 | # TIME(3) 48 | # TIMESTAMP(6) 49 | # TIMESTAMP(9) WITH TIME ZONE 50 | } 51 | 52 | 53 | @pytest.mark.parametrize( 54 | 'type_str, sql_type', 55 | parse_type_options_testcases.items(), 56 | ids=parse_type_options_testcases.keys() 57 | ) 58 | def test_parse_type_options(type_str: str, sql_type: TypeEngine): 59 | actual_type = datatype.parse_sqltype(type_str) 60 | assert_that(actual_type).is_sqltype(sql_type) 61 | 62 | 63 | parse_array_testcases = { 64 | 'array(integer)': ARRAY(INTEGER()), 65 | 'array(varchar(10))': ARRAY(VARCHAR(10)), 66 | 'array(decimal(20,3))': ARRAY(DECIMAL(20, 3)), 67 | 'array(array(varchar(10)))': ARRAY(VARCHAR(10), dimensions=2), 68 | 'array(map(char, integer))': ARRAY(MAP(CHAR(), INTEGER())) 69 | } 70 | 71 | 72 | @pytest.mark.parametrize( 73 | 'type_str, sql_type', 74 | parse_array_testcases.items(), 75 | ids=parse_array_testcases.keys() 76 | ) 77 | def test_parse_array(type_str: str, sql_type: ARRAY): 78 | actual_type = datatype.parse_sqltype(type_str) 79 | assert_that(actual_type).is_sqltype(sql_type) 80 | 81 | 82 | parse_map_testcases = { 83 | 'map(char, integer)': MAP(CHAR(), INTEGER()), 84 | 'map(varchar(10), varchar(10))': MAP(VARCHAR(10), VARCHAR(10)), 85 | 'map(varchar(10), decimal(20,3))': MAP(VARCHAR(10), DECIMAL(20, 3)), 86 | 'map(char, array(varchar(10)))': MAP(CHAR(), ARRAY(VARCHAR(10))), 87 | 'map(varchar(10), array(varchar(10)))': MAP(VARCHAR(10), ARRAY(VARCHAR(10))), 88 | 'map(varchar(10), array(array(varchar(10))))': MAP(VARCHAR(10), ARRAY(VARCHAR(10), dimensions=2)), 89 | } 90 | 91 | 92 | @pytest.mark.parametrize( 93 | 'type_str, sql_type', 94 | parse_map_testcases.items(), 95 | ids=parse_map_testcases.keys() 96 | ) 97 | def test_parse_map(type_str: str, sql_type: ARRAY): 98 | actual_type = datatype.parse_sqltype(type_str) 99 | assert_that(actual_type).is_sqltype(sql_type) 100 | 101 | 102 | parse_row_testcases = { 103 | 'row(a integer, b varchar)': 104 | ROW(attr_types=[ 105 | ("a", INTEGER()), 106 | ("b", VARCHAR()), 107 | ]), 108 | 'row(a varchar(20), b decimal(20,3))': 109 | ROW(attr_types=[ 110 | ("a", VARCHAR(20)), 111 | ("b", DECIMAL(20, 3)), 112 | ]), 113 | 'row(x array(varchar(10)), y array(array(varchar(10))), z decimal(20,3))': 114 | ROW(attr_types=[ 115 | ("x", ARRAY(VARCHAR(10))), 116 | ("y", ARRAY(VARCHAR(10), dimensions=2)), 117 | ("z", DECIMAL(20, 3)), 118 | ]), 119 | 'row(min timestamp(6) with time zone, max timestamp(6) with time zone)': 120 | ROW(attr_types=[ 121 | ("min", TIMESTAMP(timezone=True)), 122 | ("max", TIMESTAMP(timezone=True)), 123 | ]), 124 | 'row("first name" varchar, "last name" varchar)': 125 | ROW(attr_types=[ 126 | ("first name", VARCHAR()), 127 | ("last name", VARCHAR()), 128 | ]), 129 | 'row("foo,bar" varchar, "foo(bar)" varchar, "foo\\"bar" varchar)': 130 | ROW(attr_types=[ 131 | (r'foo,bar', VARCHAR()), 132 | (r'foo(bar)', VARCHAR()), 133 | (r'foo"bar', VARCHAR()), 134 | ]), 135 | } 136 | 137 | 138 | @pytest.mark.parametrize( 139 | 'type_str, sql_type', 140 | parse_row_testcases.items(), 141 | ids=parse_row_testcases.keys() 142 | ) 143 | def test_parse_row(type_str: str, sql_type: ARRAY): 144 | actual_type = datatype.parse_sqltype(type_str) 145 | assert_that(actual_type).is_sqltype(sql_type) 146 | 147 | 148 | parse_datetime_testcases = { 149 | 'date': DATE(), 150 | 'time': TIME(), 151 | 'time with time zone': TIME(timezone=True), 152 | 'timestamp': TIMESTAMP(), 153 | 'timestamp with time zone': TIMESTAMP(timezone=True), 154 | } 155 | 156 | 157 | @pytest.mark.parametrize( 158 | 'type_str, sql_type', 159 | parse_datetime_testcases.items(), 160 | ids=parse_datetime_testcases.keys() 161 | ) 162 | def test_parse_datetime(type_str: str, sql_type: ARRAY): 163 | actual_type = datatype.parse_sqltype(type_str) 164 | assert_that(actual_type).is_sqltype(sql_type) 165 | -------------------------------------------------------------------------------- /tests/test_datatype_split.py: -------------------------------------------------------------------------------- 1 | from typing import * 2 | 3 | import pytest 4 | from assertpy import assert_that 5 | 6 | from sqlalchemy_trino import datatype 7 | 8 | split_string_testcases = { 9 | '10': ['10'], 10 | '10,3': ['10', '3'], 11 | '"a,b",c': ['"a,b"', 'c'], 12 | '"a,b","c,d"': ['"a,b"', '"c,d"'], 13 | r'"a,\"b\",c",d': [r'"a,\"b\",c"', 'd'], 14 | r'"foo(bar,\"baz\")",quiz': [r'"foo(bar,\"baz\")"', 'quiz'], 15 | 'varchar': ['varchar'], 16 | 'varchar,int': ['varchar', 'int'], 17 | 'varchar,int,float': ['varchar', 'int', 'float'], 18 | 'array(varchar)': ['array(varchar)'], 19 | 'array(varchar),int': ['array(varchar)', 'int'], 20 | 'array(varchar(20))': ['array(varchar(20))'], 21 | 'array(varchar(20)),int': ['array(varchar(20))', 'int'], 22 | 'array(varchar(20)),array(varchar(20))': ['array(varchar(20))', 'array(varchar(20))'], 23 | 'map(varchar, integer),int': ['map(varchar, integer)', 'int'], 24 | 'map(varchar(20), integer),int': ['map(varchar(20), integer)', 'int'], 25 | 'map(varchar(20), varchar(20)),int': ['map(varchar(20), varchar(20))', 'int'], 26 | 'map(varchar(20), varchar(20)),array(varchar)': ['map(varchar(20), varchar(20))', 'array(varchar)'], 27 | 'row(first_name varchar(20), last_name varchar(20)),int': 28 | ['row(first_name varchar(20), last_name varchar(20))', 'int'], 29 | 'row("first name" varchar(20), "last name" varchar(20)),int': 30 | ['row("first name" varchar(20), "last name" varchar(20))', 'int'], 31 | } 32 | 33 | 34 | @pytest.mark.parametrize( 35 | 'input_string, output_strings', 36 | split_string_testcases.items(), 37 | ids=split_string_testcases.keys() 38 | ) 39 | def test_split_string(input_string: str, output_strings: List[str]): 40 | actual = list(datatype.aware_split(input_string)) 41 | assert_that(actual).is_equal_to(output_strings) 42 | 43 | 44 | split_delimiter_testcases = [ 45 | ('first,second', ',', ['first', 'second']), 46 | ('first second', ' ', ['first', 'second']), 47 | ('first|second', '|', ['first', 'second']), 48 | ('first,second third', ',', ['first', 'second third']), 49 | ('first,second third', ' ', ['first,second', 'third']), 50 | ] 51 | 52 | 53 | @pytest.mark.parametrize( 54 | 'input_string, delimiter, output_strings', 55 | split_delimiter_testcases, 56 | ) 57 | def test_split_delimiter(input_string: str, delimiter: str, output_strings: List[str]): 58 | actual = list(datatype.aware_split(input_string, delimiter=delimiter)) 59 | assert_that(actual).is_equal_to(output_strings) 60 | 61 | 62 | split_maxsplit_testcases = [ 63 | ('one,two,three', -1, ['one', 'two', 'three']), 64 | ('one,two,three', 0, ['one,two,three']), 65 | ('one,two,three', 1, ['one', 'two,three']), 66 | ('one,two,three', 2, ['one', 'two', 'three']), 67 | ('one,two,three', 3, ['one', 'two', 'three']), 68 | ('one,two,three', 10, ['one', 'two', 'three']), 69 | 70 | (',one,two,three', 0, [',one,two,three']), 71 | (',one,two,three', 1, ['', 'one,two,three']), 72 | 73 | ('one,two,three,', 2, ['one', 'two', 'three,']), 74 | ('one,two,three,', 3, ['one', 'two', 'three', '']), 75 | ] 76 | 77 | 78 | @pytest.mark.parametrize( 79 | 'input_string, maxsplit, output_strings', 80 | split_maxsplit_testcases, 81 | ) 82 | def test_split_maxsplit(input_string: str, maxsplit: int, output_strings: List[str]): 83 | actual = list(datatype.aware_split(input_string, maxsplit=maxsplit)) 84 | assert_that(actual).is_equal_to(output_strings) 85 | -------------------------------------------------------------------------------- /tests/test_dialect.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.engine import url 2 | from sqlalchemy_trino.dialect import HTTPS, JWTAuthentication, TrinoDialect 3 | 4 | 5 | def test_trino_connection_string_user(): 6 | dialect = TrinoDialect() 7 | username = 'test-user' 8 | u = url.make_url(f'trino://{username}@host') 9 | _, cparams = dialect.create_connect_args(u) 10 | 11 | assert cparams['user'] == username 12 | 13 | 14 | def test_trino_connection_string_session_user(): 15 | dialect = TrinoDialect() 16 | username = 'test-user' 17 | session_user = 'sess-user' 18 | u = url.make_url(f'trino://{username}@host/?sessionUser={session_user}') 19 | _, cparams = dialect.create_connect_args(u) 20 | 21 | assert cparams['user'] == session_user 22 | 23 | 24 | def test_trino_connection_jwt_token(): 25 | dialect = TrinoDialect() 26 | access_token = 'mock-token' 27 | u = url.make_url(f'trino://host/?accessToken={access_token}') 28 | _, cparams = dialect.create_connect_args(u) 29 | 30 | assert cparams['http_scheme'] == HTTPS 31 | assert isinstance(cparams['auth'], JWTAuthentication) 32 | assert cparams['auth'].token == access_token 33 | --------------------------------------------------------------------------------