├── .github └── workflows │ └── semgrep.yml ├── CHANGELOG ├── LICENSE.txt ├── README.rst ├── __init__.py ├── base.py ├── connector.py ├── example.py └── setup.py /.github/workflows/semgrep.yml: -------------------------------------------------------------------------------- 1 | 2 | on: 3 | pull_request: {} 4 | workflow_dispatch: {} 5 | push: 6 | branches: 7 | - main 8 | - master 9 | schedule: 10 | - cron: '0 0 * * *' 11 | name: Semgrep config 12 | jobs: 13 | semgrep: 14 | name: semgrep/ci 15 | runs-on: ubuntu-20.04 16 | env: 17 | SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }} 18 | SEMGREP_URL: https://cloudflare.semgrep.dev 19 | SEMGREP_APP_URL: https://cloudflare.semgrep.dev 20 | SEMGREP_VERSION_CHECK_URL: https://cloudflare.semgrep.dev/api/check-version 21 | container: 22 | image: returntocorp/semgrep 23 | steps: 24 | - uses: actions/checkout@v3 25 | - run: semgrep ci 26 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 0.1.5 2 | - quoting fix (Ramil Aglyautdinov) 3 | 0.1.4 4 | - Fix the db_type issue (@inpefess) 5 | - removed unnecessary debug statement (@RichRadics) 6 | 0.1.3 7 | - fix(connector): nullable types (Fadi Hadzh) 8 | - fix datetime db type (Ivan Borovkov) 9 | - Fix type discovery for Aggregated columns (Tobias Adamson) 10 | 0.1.2 11 | - Fixed escaping, executemany, nulltype support 12 | 0.1.1 13 | - Fix query result order reversed issue (@scheng-hds) 14 | - fix issue occurred when inheriting parent attributes (@lu828) 15 | 0.1.0 16 | - Basic ClickHouse syntax support, no DML/DDL 17 | - Support for tagged queries / query cancellation 18 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright (c) 2013-2016 Snowflake Computing, Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | sqlalchemy-clickhouse 2 | ===================== 3 | 4 | ClickHouse dialect for SQLAlchemy. 5 | 6 | Installation 7 | ------------ 8 | 9 | The package is installable through PIP:: 10 | 11 | pip install sqlalchemy-clickhouse 12 | 13 | Usage 14 | ----- 15 | 16 | The DSN format is similar to that of regular Postgres:: 17 | 18 | >>> import sqlalchemy as sa 19 | >>> sa.create_engine('clickhouse://username:password@hostname:port/database') 20 | Engine('clickhouse://username:password@hostname:port/database') 21 | 22 | For SSL add ssl parameter to URL:: 23 | 24 | >>> import sqlalchemy as sa 25 | >>> sa.create_engine('clickhouse://username:password@hostname:port/database?ssl=True') 26 | Engine('clickhouse://username:password@hostname:port/database') 27 | 28 | It implements a dialect, so there's no user-facing API. 29 | 30 | Testing 31 | ------- 32 | 33 | The dialect can be registered on runtime if you don't want to install it as:: 34 | 35 | from sqlalchemy.dialects import registry 36 | registry.register("clickhouse", "base", "dialect") 37 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .base import VERSION 4 | 5 | # Export version 6 | __version__ = '.'.join('%d' % v for v in VERSION[0:3]) -------------------------------------------------------------------------------- /base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Note: parts of the file come from https://github.com/snowflakedb/snowflake-sqlalchemy 4 | # licensed under the same Apache 2.0 License 5 | 6 | import re 7 | 8 | import sqlalchemy.types as sqltypes 9 | from sqlalchemy import exc as sa_exc 10 | from sqlalchemy import util as sa_util 11 | from sqlalchemy.engine import default, reflection 12 | from sqlalchemy.sql import compiler, expression 13 | from sqlalchemy.sql.elements import quoted_name 14 | from sqlalchemy.dialects.postgresql.base import PGCompiler, PGIdentifierPreparer 15 | from sqlalchemy.types import ( 16 | CHAR, DATE, DATETIME, INTEGER, SMALLINT, BIGINT, DECIMAL, TIME, 17 | TIMESTAMP, VARCHAR, BINARY, BOOLEAN, FLOAT, REAL) 18 | 19 | # Export connector version 20 | VERSION = (0, 1, 0, None) 21 | 22 | # Column spec 23 | colspecs = {} 24 | 25 | # Type decorators 26 | class ARRAY(sqltypes.TypeEngine): 27 | __visit_name__ = 'ARRAY' 28 | 29 | # Type converters 30 | ischema_names = { 31 | 'Int64': INTEGER, 32 | 'Int32': INTEGER, 33 | 'Int16': INTEGER, 34 | 'Int8': INTEGER, 35 | 'UInt64': INTEGER, 36 | 'UInt32': INTEGER, 37 | 'UInt16': INTEGER, 38 | 'UInt8': INTEGER, 39 | 'Date': DATE, 40 | 'DateTime': DATETIME, 41 | 'Float64': FLOAT, 42 | 'Float32': FLOAT, 43 | 'String': VARCHAR, 44 | 'FixedString': VARCHAR, 45 | 'Enum': VARCHAR, 46 | 'Enum8': VARCHAR, 47 | 'Enum16': VARCHAR, 48 | 'Array': ARRAY, 49 | 'Decimal': DECIMAL, 50 | } 51 | 52 | class ClickHouseIdentifierPreparer(PGIdentifierPreparer): 53 | def quote_identifier(self, value): 54 | """ Never quote identifiers. """ 55 | return self._escape_identifier(value) 56 | def quote(self, ident, force=None): 57 | if self._requires_quotes(ident): 58 | return '"{}"'.format(ident) 59 | return ident 60 | 61 | class ClickHouseCompiler(PGCompiler): 62 | def visit_count_func(self, fn, **kw): 63 | return 'count{0}'.format(self.process(fn.clause_expr, **kw)) 64 | 65 | def visit_random_func(self, fn, **kw): 66 | return 'rand()' 67 | 68 | def visit_now_func(self, fn, **kw): 69 | return 'now()' 70 | 71 | def visit_current_date_func(self, fn, **kw): 72 | return 'today()' 73 | 74 | def visit_true(self, element, **kw): 75 | return '1' 76 | 77 | def visit_false(self, element, **kw): 78 | return '0' 79 | 80 | def visit_cast(self, cast, **kwargs): 81 | if self.dialect.supports_cast: 82 | return super(ClickHouseCompiler, self).visit_cast(cast, **kwargs) 83 | else: 84 | return self.process(cast.clause, **kwargs) 85 | 86 | def visit_substring_func(self, func, **kw): 87 | s = self.process(func.clauses.clauses[0], **kw) 88 | start = self.process(func.clauses.clauses[1], **kw) 89 | if len(func.clauses.clauses) > 2: 90 | length = self.process(func.clauses.clauses[2], **kw) 91 | return "substring(%s, %s, %s)" % (s, start, length) 92 | else: 93 | return "substring(%s, %s)" % (s, start) 94 | 95 | def visit_concat_op_binary(self, binary, operator, **kw): 96 | return "concat(%s, %s)" % (self.process(binary.left), self.process(binary.right)) 97 | 98 | def visit_in_op_binary(self, binary, operator, **kw): 99 | kw['literal_binds'] = True 100 | return '%s IN %s' % ( 101 | self.process(binary.left, **kw), 102 | self.process(binary.right, **kw) 103 | ) 104 | 105 | def visit_notin_op_binary(self, binary, operator, **kw): 106 | kw['literal_binds'] = True 107 | return '%s NOT IN %s' % ( 108 | self.process(binary.left, **kw), 109 | self.process(binary.right, **kw) 110 | ) 111 | 112 | def visit_column(self, column, add_to_result_map=None, 113 | include_table=True, **kwargs): 114 | # Columns prefixed with table name are not supported 115 | return super(ClickHouseCompiler, self).visit_column(column, 116 | add_to_result_map=add_to_result_map, include_table=False, **kwargs) 117 | 118 | def render_literal_value(self, value, type_): 119 | value = super(ClickHouseCompiler, self).render_literal_value(value, type_) 120 | if isinstance(type_, sqltypes.DateTime): 121 | value = 'toDateTime(%s)' % value 122 | if isinstance(type_, sqltypes.Date): 123 | value = 'toDate(%s)' % value 124 | return value 125 | 126 | def limit_clause(self, select, **kw): 127 | text = '' 128 | if select._limit_clause is not None: 129 | text += '\n LIMIT ' + self.process(select._limit_clause, **kw) 130 | if select._offset_clause is not None: 131 | text = '\n LIMIT ' 132 | if select._limit_clause is None: 133 | text += self.process(sql.literal(-1)) 134 | else: 135 | text += '0' 136 | text += ',' + self.process(select._offset_clause, **kw) 137 | return text 138 | 139 | def for_update_clause(self, select, **kw): 140 | return '' # Not supported 141 | 142 | class ClickHouseExecutionContext(default.DefaultExecutionContext): 143 | @sa_util.memoized_property 144 | def should_autocommit(self): 145 | return False # No DML supported, never autocommit 146 | 147 | class ClickHouseTypeCompiler(compiler.GenericTypeCompiler): 148 | def visit_ARRAY(self, type, **kw): 149 | return "Array(%s)" % type 150 | 151 | class ClickHouseDialect(default.DefaultDialect): 152 | name = 'clickhouse' 153 | supports_cast = True 154 | supports_unicode_statements = True 155 | supports_unicode_binds = True 156 | supports_sane_rowcount = False 157 | supports_sane_multi_rowcount = False 158 | supports_native_decimal = True 159 | supports_native_boolean = True 160 | supports_alter = True 161 | supports_sequences = False 162 | supports_native_enum = True 163 | 164 | max_identifier_length = 127 165 | default_paramstyle = 'pyformat' 166 | colspecs = colspecs 167 | ischema_names = ischema_names 168 | convert_unicode = True 169 | returns_unicode_strings = True 170 | description_encoding = None 171 | postfetch_lastrowid = False 172 | 173 | preparer = ClickHouseIdentifierPreparer 174 | type_compiler = ClickHouseTypeCompiler 175 | statement_compiler = ClickHouseCompiler 176 | execution_ctx_cls = ClickHouseExecutionContext 177 | 178 | # Required for PG-based compiler 179 | _backslash_escapes = True 180 | 181 | @classmethod 182 | def dbapi(cls): 183 | try: 184 | import sqlalchemy_clickhouse.connector as connector 185 | except: 186 | import connector 187 | return connector 188 | 189 | def create_connect_args(self, url): 190 | kwargs = { 191 | 'db_url': 'http://%s:%d/' % (url.host, url.port or 8123), 192 | 'username': url.username, 193 | 'password': url.password, 194 | } 195 | kwargs.update(url.query) 196 | return ([url.database or 'default'], kwargs) 197 | 198 | def _get_default_schema_name(self, connection): 199 | return connection.scalar("select currentDatabase()") 200 | 201 | def get_schema_names(self, connection, **kw): 202 | return [row.name for row in connection.execute('SHOW DATABASES')] 203 | 204 | def get_view_names(self, connection, schema=None, **kw): 205 | return self.get_table_names(connection, schema, **kw) 206 | 207 | def _get_table_columns(self, connection, table_name, schema): 208 | full_table = table_name 209 | if schema: 210 | full_table = schema + '.' + table_name 211 | # This needs the table name to be unescaped (no backticks). 212 | return connection.execute('DESCRIBE TABLE {}'.format(full_table)).fetchall() 213 | 214 | def has_table(self, connection, table_name, schema=None): 215 | full_table = table_name 216 | if schema: 217 | full_table = schema + '.' + table_name 218 | for r in connection.execute('EXISTS TABLE {}'.format(full_table)): 219 | if r.result == 1: 220 | return True 221 | return False 222 | 223 | @reflection.cache 224 | def get_columns(self, connection, table_name, schema=None, **kw): 225 | rows = self._get_table_columns(connection, table_name, schema) 226 | result = [] 227 | for r in rows: 228 | col_name = r.name 229 | col_type = "" 230 | if r.type.startswith("AggregateFunction"): 231 | # Extract type information from a column 232 | # using AggregateFunction 233 | # the type from clickhouse will be 234 | # AggregateFunction(sum, Int64) for an Int64 type 235 | # remove first 24 chars and remove the last one to get Int64 236 | col_type = r.type[23:-1] 237 | elif r.type.startswith("Nullable"): 238 | col_type = re.search(r'^\w+', r.type[9:-1]).group(0) 239 | else: 240 | # Take out the more detailed type information 241 | # e.g. 'map' -> 'map' 242 | # 'decimal(10,1)' -> decimal 243 | col_type = re.search(r'^\w+', r.type).group(0) 244 | try: 245 | coltype = ischema_names[col_type] 246 | except KeyError: 247 | coltype = sqltypes.NullType 248 | result.append({ 249 | 'name': col_name, 250 | 'type': coltype, 251 | 'nullable': True, 252 | 'default': None, 253 | }) 254 | return result 255 | 256 | @reflection.cache 257 | def get_foreign_keys(self, connection, table_name, schema=None, **kw): 258 | # No support for foreign keys. 259 | return [] 260 | 261 | @reflection.cache 262 | def get_pk_constraint(self, connection, table_name, schema=None, **kw): 263 | # No support for primary keys. 264 | return [] 265 | 266 | @reflection.cache 267 | def get_indexes(self, connection, table_name, schema=None, **kw): 268 | full_table = table_name 269 | if schema: 270 | full_table = schema + '.' + table_name 271 | # We must get the full table creation STMT to parse engine and partitions 272 | rows = [r for r in connection.execute('SHOW CREATE TABLE {}'.format(full_table))] 273 | if len(rows) < 1: 274 | return [] 275 | # VIEWs are not going to have ENGINE associated, there is no good way how to 276 | # determine partitioning columns (or indexes) 277 | engine_spec = re.search(r'ENGINE = (\w+)\((.+)\)', rows[0].statement) 278 | if not engine_spec: 279 | return [] 280 | engine, params = engine_spec.group(1,2) 281 | # Handle partition columns 282 | cols = re.search(r'\((.+)\)', params) 283 | if not cols: 284 | return [] 285 | col_names = [c.strip() for c in cols.group(1).split(',')] 286 | return [{'name': 'partition', 'column_names': col_names, 'unique': False}] 287 | 288 | @reflection.cache 289 | def get_table_names(self, connection, schema=None, **kw): 290 | query = 'SHOW TABLES' 291 | if schema: 292 | query += ' FROM ' + schema 293 | return [row.name for row in connection.execute(query)] 294 | 295 | def do_rollback(self, dbapi_connection): 296 | # No transactions 297 | pass 298 | 299 | def _check_unicode_returns(self, connection, additional_tests=None): 300 | # We decode everything as UTF-8 301 | return True 302 | 303 | def _check_unicode_description(self, connection): 304 | # We decode everything as UTF-8 305 | return True 306 | 307 | dialect = ClickHouseDialect 308 | -------------------------------------------------------------------------------- /connector.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # See http://www.python.org/dev/peps/pep-0249/ 4 | # 5 | # Many docstrings in this file are based on the PEP, which is in the public domain. 6 | 7 | from __future__ import absolute_import 8 | from __future__ import unicode_literals 9 | import re 10 | import uuid 11 | import requests 12 | from infi.clickhouse_orm.models import ModelBase 13 | from infi.clickhouse_orm.database import Database 14 | from datetime import datetime 15 | 16 | # PEP 249 module globals 17 | apilevel = '2.0' 18 | threadsafety = 2 # Threads may share the module and connections. 19 | paramstyle = 'pyformat' # Python extended format codes, e.g. ...WHERE name=%(name)s 20 | 21 | # Python 2/3 compatibility 22 | try: 23 | isinstance(obj, basestring) 24 | except NameError: 25 | basestring = str 26 | 27 | class Error(Exception): 28 | """Exception that is the base class of all other error exceptions. 29 | You can use this to catch all errors with one single except statement. 30 | """ 31 | pass 32 | 33 | class ParamEscaper(object): 34 | def escape_args(self, parameters): 35 | if isinstance(parameters, dict): 36 | return {k: self.escape_item(v) for k, v in parameters.items()} 37 | elif isinstance(parameters, (list, tuple)): 38 | return tuple(self.escape_item(x) for x in parameters) 39 | else: 40 | raise Exception("Unsupported param format: {}".format(parameters)) 41 | 42 | def escape_number(self, item): 43 | return item 44 | 45 | def escape_string(self, item): 46 | # Need to decode UTF-8 because of old sqlalchemy. 47 | # Newer SQLAlchemy checks dialect.supports_unicode_binds before encoding Unicode strings 48 | # as byte strings. The old version always encodes Unicode as byte strings, which breaks 49 | # string formatting here. 50 | if isinstance(item, bytes): 51 | item = item.decode('utf-8') 52 | return "'{}'".format(item.replace("\\", "\\\\").replace("'", "\\'").replace("$", "$$")) 53 | 54 | def escape_item(self, item): 55 | if item is None: 56 | return 'NULL' 57 | elif isinstance(item, (int, float)): 58 | return self.escape_number(item) 59 | elif isinstance(item, basestring): 60 | return self.escape_string(item) 61 | elif isinstance(item, datetime): 62 | return self.escape_string(item.strftime("%Y-%m-%d %H:%M:%S")) 63 | else: 64 | raise Exception("Unsupported object {}".format(item)) 65 | 66 | _escaper = ParamEscaper() 67 | 68 | # Patch ORM library 69 | @classmethod 70 | def create_ad_hoc_field(cls, db_type): 71 | import infi.clickhouse_orm.fields as orm_fields 72 | 73 | # Enums 74 | if db_type.startswith('Enum'): 75 | db_type = 'String' # enum.Eum is not comparable 76 | # Arrays 77 | if db_type.startswith('Array'): 78 | inner_field = cls.create_ad_hoc_field(db_type[6 : -1]) 79 | return orm_fields.ArrayField(inner_field) 80 | # FixedString 81 | if db_type.startswith('FixedString'): 82 | db_type = 'String' 83 | 84 | if db_type == 'LowCardinality(String)': 85 | db_type = 'String' 86 | 87 | if db_type.startswith('DateTime'): 88 | db_type = 'DateTime' 89 | 90 | if db_type.startswith('Nullable'): 91 | inner_field = cls.create_ad_hoc_field(db_type[9 : -1]) 92 | return orm_fields.NullableField(inner_field) 93 | 94 | # db_type for Deimal comes like 'Decimal(P, S) string where P is precision and S is scale' 95 | if db_type.startswith('Decimal'): 96 | nums = [int(n) for n in db_type[8:-1].split(',')] 97 | return orm_fields.DecimalField(nums[0], nums[1]) 98 | 99 | # Simple fields 100 | name = db_type + 'Field' 101 | if not hasattr(orm_fields, name): 102 | raise NotImplementedError('No field class for %s' % db_type) 103 | return getattr(orm_fields, name)() 104 | ModelBase.create_ad_hoc_field = create_ad_hoc_field 105 | 106 | from six import PY3, string_types 107 | def _send(self, data, settings=None, stream=False): 108 | if PY3 and isinstance(data, string_types): 109 | data = data.encode('utf-8') 110 | params = self._build_params(settings) 111 | r = self.request_session.post(self.db_url, params=params, data=data, stream=stream, timeout=self.timeout) 112 | if r.status_code != 200: 113 | raise Exception(r.text) 114 | return r 115 | Database._send = _send 116 | 117 | # 118 | # Connector interface 119 | # 120 | 121 | def connect(*args, **kwargs): 122 | return Connection(*args, **kwargs) 123 | 124 | class Connection(Database): 125 | """ 126 | These objects are small stateless factories for cursors, which do all the real work. 127 | """ 128 | def __init__(self, db_name, db_url='http://localhost:8123/', username=None, password=None, readonly=False, ssl="False"): 129 | if ssl.upper() == "TRUE": 130 | db_url = db_url.replace("http", "https") 131 | elif ssl.upper() == "FALSE": 132 | pass 133 | else: 134 | raise ValueError("Not Supported value of ssl parameter, only True/False values are accepted") 135 | super(Connection, self).__init__(db_name, db_url, username, password, readonly) 136 | self.db_name = db_name 137 | self.db_url = db_url 138 | self.username = username 139 | self.password = password 140 | self.readonly = readonly 141 | 142 | def close(self): 143 | pass 144 | 145 | def commit(self): 146 | pass 147 | 148 | def cursor(self): 149 | return Cursor(self) 150 | 151 | def rollback(self): 152 | raise NotSupportedError("Transactions are not supported") # pragma: no cover 153 | 154 | class Cursor(object): 155 | """These objects represent a database cursor, which is used to manage the context of a fetch 156 | operation. 157 | 158 | Cursors are not isolated, i.e., any changes done to the database by a cursor are immediately 159 | visible by other cursors or connections. 160 | """ 161 | _STATE_NONE = 0 162 | _STATE_RUNNING = 1 163 | _STATE_FINISHED = 2 164 | 165 | def __init__(self, database): 166 | self._db = database 167 | self._reset_state() 168 | self._arraysize = 1 169 | 170 | def _reset_state(self): 171 | """Reset state about the previous query in preparation for running another query""" 172 | self._uuid = None 173 | self._columns = None 174 | self._rownumber = 0 175 | # Internal helper state 176 | self._state = self._STATE_NONE 177 | self._data = None 178 | self._columns = None 179 | 180 | @property 181 | def rowcount(self): 182 | """By default, return -1 to indicate that this is not supported.""" 183 | return -1 184 | 185 | @property 186 | def description(self): 187 | """This read-only attribute is a sequence of 7-item sequences. 188 | 189 | Each of these sequences contains information describing one result column: 190 | 191 | - name 192 | - type_code 193 | - display_size (None in current implementation) 194 | - internal_size (None in current implementation) 195 | - precision (None in current implementation) 196 | - scale (None in current implementation) 197 | - null_ok (always True in current implementation) 198 | 199 | The ``type_code`` can be interpreted by comparing it to the Type Objects specified in the 200 | section below. 201 | """ 202 | # Sleep until we're done or we got the columns 203 | if self._columns is None: 204 | return [] 205 | return [ 206 | # name, type_code, display_size, internal_size, precision, scale, null_ok 207 | (col[0], col[1], None, None, None, None, True) for col in self._columns 208 | ] 209 | 210 | def close(self): 211 | pass 212 | 213 | def execute(self, operation, parameters=None, is_response=True): 214 | """Prepare and execute a database operation (query or command). """ 215 | if parameters is None or not parameters: 216 | sql = operation 217 | else: 218 | sql = operation % _escaper.escape_args(parameters) 219 | 220 | self._reset_state() 221 | 222 | self._state = self._STATE_RUNNING 223 | self._uuid = uuid.uuid1() 224 | 225 | if is_response: 226 | response = self._db.select(sql, settings={'query_id': self._uuid}) 227 | self._process_response(response) 228 | else: 229 | self._db.raw(sql) 230 | 231 | def executemany(self, operation, seq_of_parameters): 232 | """Prepare a database operation (query or command) and then execute it against all parameter 233 | sequences or mappings found in the sequence ``seq_of_parameters``. 234 | 235 | Only the final result set is retained. 236 | 237 | Return values are not defined. 238 | """ 239 | values_list = [] 240 | RE_INSERT_VALUES = re.compile( 241 | r"\s*((?:INSERT|REPLACE)\s.+\sVALUES?\s*)" + 242 | r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))" + 243 | r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z", 244 | re.IGNORECASE | re.DOTALL) 245 | 246 | m = RE_INSERT_VALUES.match(operation) 247 | if m: 248 | q_prefix = m.group(1) % () 249 | q_values = m.group(2).rstrip() 250 | 251 | for parameters in seq_of_parameters[:-1]: 252 | values_list.append(q_values % _escaper.escape_args(parameters)) 253 | query = '{} {};'.format(q_prefix, ','.join(values_list)) 254 | return self._db.raw(query) 255 | for parameters in seq_of_parameters[:-1]: 256 | self.execute(operation, parameters, is_response=False) 257 | 258 | def fetchone(self): 259 | """Fetch the next row of a query result set, returning a single sequence, or ``None`` when 260 | no more data is available. """ 261 | if self._state == self._STATE_NONE: 262 | raise Exception("No query yet") 263 | if not self._data: 264 | return None 265 | else: 266 | self._rownumber += 1 267 | return self._data.pop(0) 268 | 269 | def fetchmany(self, size=None): 270 | """Fetch the next set of rows of a query result, returning a sequence of sequences (e.g. a 271 | list of tuples). An empty sequence is returned when no more rows are available. 272 | 273 | The number of rows to fetch per call is specified by the parameter. If it is not given, the 274 | cursor's arraysize determines the number of rows to be fetched. The method should try to 275 | fetch as many rows as indicated by the size parameter. If this is not possible due to the 276 | specified number of rows not being available, fewer rows may be returned. 277 | """ 278 | if self._state == self._STATE_NONE: 279 | raise Exception("No query yet") 280 | 281 | if size is None: 282 | size = 1 283 | 284 | if not self._data: 285 | return [] 286 | else: 287 | if len(self._data) > size: 288 | result, self._data = self._data[:size], self._data[size:] 289 | else: 290 | result, self._data = self._data, [] 291 | self._rownumber += len(result) 292 | return result 293 | 294 | def fetchall(self): 295 | """Fetch all (remaining) rows of a query result, returning them as a sequence of sequences 296 | (e.g. a list of tuples). 297 | """ 298 | if self._state == self._STATE_NONE: 299 | raise Exception("No query yet") 300 | 301 | if not self._data: 302 | return [] 303 | else: 304 | result, self._data = self._data, [] 305 | self._rownumber += len(result) 306 | return result 307 | 308 | @property 309 | def arraysize(self): 310 | """This read/write attribute specifies the number of rows to fetch at a time with 311 | :py:meth:`fetchmany`. It defaults to 1 meaning to fetch a single row at a time. 312 | """ 313 | return self._arraysize 314 | 315 | @arraysize.setter 316 | def arraysize(self, value): 317 | self._arraysize = value 318 | 319 | def setinputsizes(self, sizes): 320 | """Does nothing by default""" 321 | pass 322 | 323 | def setoutputsize(self, size, column=None): 324 | """Does nothing by default""" 325 | pass 326 | 327 | def __next__(self): 328 | """Return the next row from the currently executing SQL statement using the same semantics 329 | as :py:meth:`fetchone`. A ``StopIteration`` exception is raised when the result set is 330 | exhausted. 331 | """ 332 | one = self.fetchone() 333 | if one is None: 334 | raise StopIteration 335 | else: 336 | return one 337 | 338 | next = __next__ 339 | 340 | def __iter__(self): 341 | """Return self to make cursors compatible to the iteration protocol.""" 342 | return self 343 | 344 | def cancel(self): 345 | if self._state == self._STATE_NONE: 346 | raise ProgrammingError("No query yet") 347 | if self._uuid is None: 348 | assert self._state == self._STATE_FINISHED, "Query should be finished" 349 | return 350 | # Replace current running query to cancel it 351 | self._db.select("SELECT 1", settings={"query_id":self._uuid}) 352 | self._state = self._STATE_FINISHED 353 | self._uuid = None 354 | self._data = None 355 | 356 | def poll(self): 357 | pass 358 | 359 | def _process_response(self, response): 360 | """ Update the internal state with the data from the response """ 361 | assert self._state == self._STATE_RUNNING, "Should be running if processing response" 362 | cols = None 363 | data = [] 364 | for r in response: 365 | if not cols: 366 | cols = [(f, r._fields[f].db_type) for f in r._fields] 367 | data.append([getattr(r, f) for f in r._fields]) 368 | self._data = data 369 | self._columns = cols 370 | self._state = self._STATE_FINISHED 371 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Use connector directly 4 | import connector 5 | cursor = connector.connect('default').cursor() 6 | cursor.execute('SELECT * FROM test LIMIT 10') 7 | print(cursor.fetchone()) 8 | 9 | # Register SQLAlchemy dialect 10 | from sqlalchemy.dialects import registry 11 | registry.register("clickhouse", "base", "dialect") 12 | 13 | # Test engine and table 14 | from sqlalchemy import * 15 | from sqlalchemy.engine import create_engine 16 | from sqlalchemy.schema import * 17 | 18 | engine = create_engine('clickhouse://default:@localhost:8123/default') 19 | logs = Table('test', MetaData(bind=engine), autoload=True) 20 | print(select([func.count('*')], from_obj=logs).scalar()) 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from os import path, getenv 4 | from setuptools import setup 5 | from codecs import open 6 | 7 | VERSION = [0, 1, 5] 8 | readme = open('README.rst').read() 9 | 10 | setup( 11 | name='sqlalchemy-clickhouse', 12 | version='.'.join('%d' % v for v in VERSION[0:3]), 13 | description='ClickHouse SQLAlchemy Dialect', 14 | long_description = readme, 15 | author = 'Cloudflare, Inc.', 16 | author_email = 'mvavrusa@cloudflare.com', 17 | license = 'Apache License, Version 2.0', 18 | url = 'https://github.com/cloudflare/sqlalchemy-clickhouse', 19 | keywords = "db database cloud analytics clickhouse", 20 | download_url = 'https://github.com/cloudflare/sqlalchemy-clickhouse/releases/tag/v0.1.5', 21 | install_requires = [ 22 | 'sqlalchemy>=1.0.0', 23 | 'infi.clickhouse_orm>=1.2.0' 24 | ], 25 | packages=[ 26 | 'sqlalchemy_clickhouse', 27 | ], 28 | package_dir={ 29 | 'sqlalchemy_clickhouse': '.', 30 | }, 31 | package_data={ 32 | 'sqlalchemy_clickhouse': ['LICENSE.txt'], 33 | }, 34 | entry_points={ 35 | 'sqlalchemy.dialects': [ 36 | 'clickhouse=sqlalchemy_clickhouse.base', 37 | ] 38 | }, 39 | classifiers = [ 40 | 'Development Status :: 5 - Production/Stable', 41 | 42 | 'Environment :: Console', 43 | 'Environment :: Other Environment', 44 | 45 | 'Intended Audience :: Developers', 46 | 'Intended Audience :: Education', 47 | 'Intended Audience :: Information Technology', 48 | 'Intended Audience :: System Administrators', 49 | 50 | 'License :: OSI Approved :: Apache Software License', 51 | 52 | 'Operating System :: OS Independent', 53 | 54 | 'Programming Language :: SQL', 55 | 'Programming Language :: Python :: 2.7', 56 | 'Programming Language :: Python :: 3.4', 57 | 'Programming Language :: Python :: 3.5', 58 | 59 | 'Topic :: Database', 60 | 'Topic :: Software Development', 61 | 'Topic :: Software Development :: Libraries', 62 | 'Topic :: Software Development :: Libraries :: Application Frameworks', 63 | 'Topic :: Software Development :: Libraries :: Python Modules', 64 | ], 65 | ) 66 | --------------------------------------------------------------------------------