├── tests ├── unit │ ├── __init__.py │ ├── hooks │ │ ├── __init__.py │ │ ├── test_clickhouse_dbapi.py │ │ └── test_clickhouse.py │ ├── operators │ │ ├── __init__.py │ │ ├── test_clickhouse_dbapi.py │ │ └── test_clickhouse.py │ └── sensors │ │ ├── __init__.py │ │ ├── test_clickhouse_dbapi.py │ │ └── test_clickhouse.py ├── integration │ ├── __init__.py │ └── hooks │ │ ├── __init__.py │ │ ├── test_clickhouse_dbapi.py │ │ └── test_clickhouse.py └── __init__.py ├── requirements.txt ├── .github ├── dependabot.yml └── workflows │ ├── publish-to-pypi.yml │ └── tests.yml ├── .gitignore ├── src └── airflow_clickhouse_plugin │ ├── sensors │ ├── clickhouse_dbapi.py │ └── clickhouse.py │ ├── hooks │ ├── clickhouse_dbapi.py │ └── clickhouse.py │ └── operators │ ├── clickhouse_dbapi.py │ └── clickhouse.py ├── LICENSE ├── pyproject.toml └── README.md /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/operators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/sensors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | clickhouse-driver~=0.2.9 2 | apache-airflow>=2.0.0,<3.2.0 3 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | tests 3 | ~~~~~ 4 | 5 | Test suite for the airflow_clickhouse_plugin package. 6 | """ 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # visual studio code 2 | .vscode/ 3 | 4 | # JetBrains IDEs 5 | .idea/ 6 | .fleet/ 7 | 8 | # python 9 | venv/ 10 | __pycache__/ 11 | *.egg-info/ 12 | 13 | # misc 14 | .DS_Store 15 | -------------------------------------------------------------------------------- /src/airflow_clickhouse_plugin/sensors/clickhouse_dbapi.py: -------------------------------------------------------------------------------- 1 | from airflow.providers.common.sql.sensors.sql import SqlSensor 2 | 3 | from airflow_clickhouse_plugin.hooks.clickhouse_dbapi import ClickHouseDbApiHook 4 | from airflow_clickhouse_plugin.operators.clickhouse_dbapi import \ 5 | ClickHouseDbApiHookMixin 6 | 7 | 8 | class ClickHouseSqlSensor(ClickHouseDbApiHookMixin, SqlSensor): 9 | def _get_hook(self) -> ClickHouseDbApiHook: 10 | return self._get_clickhouse_db_api_hook() 11 | -------------------------------------------------------------------------------- /tests/integration/hooks/test_clickhouse_dbapi.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from airflow_clickhouse_plugin.hooks.clickhouse_dbapi import ClickHouseDbApiHook 4 | 5 | 6 | class ClickHouseDbApiHookTestCase(unittest.TestCase): 7 | def test_get_records(self): 8 | records = ClickHouseDbApiHook().get_records( 9 | ''' 10 | SELECT number * %(multiplier)s AS output 11 | FROM system.numbers 12 | LIMIT 1 OFFSET 1 13 | ''', 14 | parameters={'multiplier': 2}, 15 | ) 16 | self.assertListEqual([(2,)], records) 17 | 18 | 19 | if __name__ == '__main__': 20 | unittest.main() 21 | -------------------------------------------------------------------------------- /src/airflow_clickhouse_plugin/hooks/clickhouse_dbapi.py: -------------------------------------------------------------------------------- 1 | import clickhouse_driver 2 | from airflow.providers.common.sql.hooks.sql import DbApiHook 3 | 4 | from airflow_clickhouse_plugin.hooks.clickhouse import conn_to_kwargs, \ 5 | default_conn_name 6 | 7 | 8 | class ClickHouseDbApiHook(DbApiHook): 9 | conn_name_attr = 'clickhouse_conn_id' 10 | clickhouse_conn_id: str # set by DbApiHook.__init__ 11 | default_conn_name = default_conn_name 12 | 13 | def __init__(self, *args, schema: str = None, **kwargs): 14 | super().__init__(*args, **kwargs) 15 | self._schema = schema 16 | 17 | def get_conn(self) -> clickhouse_driver.dbapi.Connection: 18 | airflow_conn = self.get_connection(self.clickhouse_conn_id) 19 | return clickhouse_driver.dbapi \ 20 | .connect(**conn_to_kwargs(airflow_conn, self._schema)) 21 | -------------------------------------------------------------------------------- /tests/integration/hooks/test_clickhouse.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from airflow_clickhouse_plugin.hooks.clickhouse import ClickHouseHook 4 | 5 | 6 | class ClickHouseHookTestCase(unittest.TestCase): 7 | def test_execute(self): 8 | return_value = ClickHouseHook().execute( 9 | 'SELECT sum(value) * %(multiplier)s AS output FROM ext', 10 | params={'multiplier': 2}, 11 | with_column_types=True, 12 | external_tables=[{ 13 | 'name': 'ext', 14 | 'structure': [('value', 'Int32')], 15 | 'data': [{'value': 1}, {'value': 2}], 16 | }], 17 | query_id='airflow-clickhouse-plugin-test', 18 | types_check=True, 19 | columnar=True, 20 | ) 21 | self.assertTupleEqual(([(6,)], [('output', 'Int64')]), return_value) 22 | 23 | 24 | if __name__ == '__main__': 25 | unittest.main() 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2019-2023 Anton Bryzgalov, tony.bryzgaloff@gmail.com 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/airflow_clickhouse_plugin/sensors/clickhouse.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | 3 | from airflow.exceptions import AirflowException 4 | from airflow.sensors.base import BaseSensorOperator 5 | 6 | from airflow_clickhouse_plugin.hooks.clickhouse import ExecuteReturnT 7 | from airflow_clickhouse_plugin.operators.clickhouse import \ 8 | BaseClickHouseOperator 9 | 10 | 11 | class ClickHouseSensor(BaseClickHouseOperator, BaseSensorOperator): 12 | """ Pokes using clickhouse_driver.Client.execute. """ 13 | 14 | def __init__( 15 | self, 16 | *args, 17 | is_failure: t.Callable[[ExecuteReturnT], bool] = None, 18 | is_success: t.Callable[[ExecuteReturnT], bool] = None, 19 | **kwargs, 20 | ): 21 | super().__init__(*args, **kwargs) 22 | self._is_failure = is_failure 23 | self._is_success = bool if is_success is None else is_success 24 | 25 | def poke(self, context: dict) -> bool: 26 | result = self._hook_execute() 27 | if self._is_failure is not None: 28 | is_failure = self._is_failure(result) 29 | if is_failure: 30 | raise AirflowException(f'is_failure returned {is_failure}') 31 | return self._is_success(result) 32 | -------------------------------------------------------------------------------- /tests/unit/operators/test_clickhouse_dbapi.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | 4 | from airflow_clickhouse_plugin.operators.clickhouse_dbapi import \ 5 | ClickHouseBaseDbApiOperator 6 | 7 | 8 | class ClickHouseBaseDbApiOperatorTestCase(unittest.TestCase): 9 | def test_arguments(self): 10 | return_value = ClickHouseBaseDbApiOperator( 11 | task_id='test1', # required by Airflow 12 | conn_id='test-conn-id', 13 | database='test-database', 14 | hook_params={'test_param': 'test-param-value'}, 15 | ).get_db_hook() 16 | self._hook_cls_mock.assert_called_once_with( 17 | clickhouse_conn_id='test-conn-id', 18 | schema='test-database', 19 | test_param='test-param-value', 20 | ) 21 | self.assertIs(return_value, self._hook_cls_mock.return_value) 22 | 23 | def test_defaults(self): 24 | ClickHouseBaseDbApiOperator( 25 | task_id='test2', # required by Airflow 26 | ).get_db_hook() 27 | self._hook_cls_mock.assert_called_once_with(schema=None) 28 | 29 | def setUp(self): 30 | self._hook_cls_patcher = mock.patch('.'.join(( 31 | 'airflow_clickhouse_plugin.operators', 32 | 'clickhouse_dbapi.ClickHouseDbApiHook', 33 | ))) 34 | self._hook_cls_mock = self._hook_cls_patcher.start() 35 | 36 | def tearDown(self): 37 | self._hook_cls_patcher.stop() 38 | 39 | 40 | if __name__ == '__main__': 41 | unittest.main() 42 | -------------------------------------------------------------------------------- /tests/unit/sensors/test_clickhouse_dbapi.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | 4 | from airflow_clickhouse_plugin.sensors.clickhouse_dbapi import \ 5 | ClickHouseSqlSensor 6 | 7 | 8 | class ClickHouseSqlSensorTestCase(unittest.TestCase): 9 | def test_arguments(self): 10 | return_value = ClickHouseSqlSensor( 11 | task_id='test1', # required by Airflow 12 | sql='SELECT 1', # required by SqlSensor 13 | conn_id='test-conn-id', 14 | hook_params={'test_param': 'test-param-value'}, 15 | )._get_hook() 16 | self._hook_cls_mock.assert_called_once_with( 17 | clickhouse_conn_id='test-conn-id', 18 | test_param='test-param-value', 19 | ) 20 | self.assertIs(return_value, self._hook_cls_mock.return_value) 21 | 22 | def test_defaults(self): 23 | ClickHouseSqlSensor( 24 | task_id='test2', # required by Airflow 25 | sql='SELECT 2', # required by SqlSensor 26 | conn_id='test-conn-id', # required by SqlSensor 27 | )._get_hook() 28 | self._hook_cls_mock.assert_called_once_with( 29 | clickhouse_conn_id='test-conn-id', 30 | ) 31 | 32 | def setUp(self): 33 | self._hook_cls_patcher = mock.patch('.'.join(( 34 | 'airflow_clickhouse_plugin.operators', 35 | 'clickhouse_dbapi.ClickHouseDbApiHook', 36 | ))) 37 | self._hook_cls_mock = self._hook_cls_patcher.start() 38 | 39 | def tearDown(self): 40 | self._hook_cls_patcher.stop() 41 | 42 | 43 | if __name__ == '__main__': 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools", 4 | ] 5 | build-backend = "setuptools.build_meta" 6 | 7 | [project] 8 | name = "airflow-clickhouse-plugin" 9 | version = "1.6.0" 10 | description = "airflow-clickhouse-plugin — Airflow plugin to execute ClickHouse commands and queries" 11 | readme = "README.md" 12 | requires-python = ">=3.9" 13 | license = { text = "MIT License" } 14 | authors = [ 15 | { name = "Anton Bryzgalov, Viktor Taranenko", email = "tony.bryzgaloff@gmail.com" }, 16 | ] 17 | keywords = [ 18 | "clickhouse", 19 | "airflow", 20 | ] 21 | classifiers = [ 22 | "Development Status :: 5 - Production/Stable", 23 | "Environment :: Plugins", 24 | "Intended Audience :: Developers", 25 | "Programming Language :: Python :: 3.12", 26 | "Programming Language :: Python :: 3.11", 27 | "Programming Language :: Python :: 3.10", 28 | "Programming Language :: Python :: 3.9", 29 | ] 30 | dynamic = [ 31 | "dependencies", 32 | ] 33 | 34 | [project.urls] 35 | GitHub = "https://github.com/bryzgaloff/airflow-clickhouse-plugin" 36 | Documentation = "https://github.com/bryzgaloff/airflow-clickhouse-plugin#airflow-clickhouse-plugin" 37 | Changelog = "https://github.com/bryzgaloff/airflow-clickhouse-plugin/releases" 38 | Issues = "https://github.com/bryzgaloff/airflow-clickhouse-plugin/issues" 39 | 40 | [project.optional-dependencies] 41 | "common.sql" = [ 42 | "apache-airflow[common.sql]>=2.2.0,<3.2.0", 43 | "apache-airflow-providers-common-sql>=1.3.0", # introduces SQLExecuteQueryOperator 44 | "clickhouse-driver>=0.2.1", 45 | ] 46 | 47 | [tool.setuptools.dynamic] 48 | dependencies = { file = ["requirements.txt"] } 49 | 50 | [tool.setuptools.packages.find] 51 | where = ["src"] 52 | namespaces = true # add directories without __init__.py 53 | -------------------------------------------------------------------------------- /src/airflow_clickhouse_plugin/operators/clickhouse_dbapi.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | 3 | from airflow.providers.common.sql.operators import sql 4 | 5 | from airflow_clickhouse_plugin.hooks.clickhouse_dbapi import \ 6 | ClickHouseDbApiHook 7 | 8 | 9 | class ClickHouseDbApiHookMixin(object): 10 | # these attributes are defined in both BaseSQLOperator and SqlSensor 11 | conn_id: str 12 | hook_params: t.Optional[dict] 13 | 14 | def _get_clickhouse_db_api_hook(self, **extra_hook_params) -> ClickHouseDbApiHook: 15 | hook_kwargs = {} 16 | if self.conn_id is not None: 17 | hook_kwargs['clickhouse_conn_id'] = self.conn_id 18 | if self.hook_params is not None: 19 | hook_kwargs.update(self.hook_params) 20 | hook_kwargs.update(extra_hook_params) 21 | return ClickHouseDbApiHook(**hook_kwargs) 22 | 23 | 24 | class ClickHouseBaseDbApiOperator(ClickHouseDbApiHookMixin, sql.BaseSQLOperator): 25 | def get_db_hook(self) -> ClickHouseDbApiHook: 26 | return self._get_clickhouse_db_api_hook(schema=self.database) 27 | 28 | 29 | class ClickHouseSQLExecuteQueryOperator( 30 | ClickHouseBaseDbApiOperator, 31 | sql.SQLExecuteQueryOperator, 32 | ): 33 | pass 34 | 35 | 36 | class ClickHouseSQLColumnCheckOperator( 37 | ClickHouseBaseDbApiOperator, 38 | sql.SQLColumnCheckOperator, 39 | ): 40 | pass 41 | 42 | 43 | class ClickHouseSQLTableCheckOperator( 44 | ClickHouseBaseDbApiOperator, 45 | sql.SQLTableCheckOperator, 46 | ): 47 | pass 48 | 49 | 50 | class ClickHouseSQLCheckOperator( 51 | ClickHouseBaseDbApiOperator, 52 | sql.SQLCheckOperator, 53 | ): 54 | pass 55 | 56 | 57 | class ClickHouseSQLValueCheckOperator( 58 | ClickHouseBaseDbApiOperator, 59 | sql.SQLValueCheckOperator, 60 | ): 61 | pass 62 | 63 | 64 | class ClickHouseSQLIntervalCheckOperator( 65 | ClickHouseBaseDbApiOperator, 66 | sql.SQLIntervalCheckOperator, 67 | ): 68 | pass 69 | 70 | 71 | class ClickHouseSQLThresholdCheckOperator( 72 | ClickHouseBaseDbApiOperator, 73 | sql.SQLThresholdCheckOperator, 74 | ): 75 | pass 76 | 77 | 78 | class ClickHouseBranchSQLOperator( 79 | ClickHouseBaseDbApiOperator, 80 | sql.BranchSQLOperator, 81 | ): 82 | pass 83 | -------------------------------------------------------------------------------- /tests/unit/hooks/test_clickhouse_dbapi.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | 4 | from airflow.models import Connection 5 | 6 | from airflow_clickhouse_plugin.hooks.clickhouse_dbapi import \ 7 | ClickHouseDbApiHook 8 | 9 | 10 | class ClickHouseDbApiHookTestCase(unittest.TestCase): 11 | def test_definition(self): 12 | self.assertEqual('clickhouse_conn_id', ClickHouseDbApiHook.conn_name_attr) 13 | self.assertEqual('clickhouse_default', ClickHouseDbApiHook.default_conn_name) 14 | 15 | def test_arguments(self): 16 | self._get_connection_mock.return_value = Connection( 17 | conn_id='test-conn-id', 18 | host='test-host', 19 | port=1234, 20 | login='test-login', 21 | password='test-pass', 22 | schema='test-schema', 23 | extra='{"test_extra": "test-extra"}', 24 | ) 25 | return_value = ClickHouseDbApiHook( 26 | clickhouse_conn_id='test-conn-id', 27 | schema='test-schema', 28 | ).get_conn() 29 | self._get_connection_mock.assert_called_once_with('test-conn-id') 30 | self._connect_mock.assert_called_once_with( 31 | user='test-login', 32 | password='test-pass', 33 | host='test-host', 34 | port=1234, 35 | database='test-schema', 36 | test_extra='test-extra', 37 | ) 38 | self.assertIs(return_value, self._connect_mock.return_value) 39 | 40 | def test_defaults(self): 41 | self._get_connection_mock.return_value = Connection() 42 | ClickHouseDbApiHook().get_conn() 43 | self._get_connection_mock.assert_called_once_with('clickhouse_default') 44 | self._connect_mock.assert_called_once_with(host='localhost') 45 | 46 | def setUp(self) -> None: 47 | self._get_connection_patcher = \ 48 | mock.patch.object(ClickHouseDbApiHook, 'get_connection') 49 | self._get_connection_mock = self._get_connection_patcher.start() 50 | self._connect_patcher = mock.patch('clickhouse_driver.dbapi.connect') 51 | self._connect_mock = self._connect_patcher.start() 52 | 53 | def tearDown(self) -> None: 54 | self._get_connection_patcher.stop() 55 | self._connect_patcher.stop() 56 | 57 | 58 | if __name__ == '__main__': 59 | unittest.main() 60 | -------------------------------------------------------------------------------- /src/airflow_clickhouse_plugin/operators/clickhouse.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | 3 | from airflow.models import BaseOperator 4 | 5 | from airflow_clickhouse_plugin.hooks.clickhouse import ClickHouseHook, \ 6 | ExecuteParamsT, ExecuteReturnT, default_conn_name 7 | 8 | 9 | class BaseClickHouseOperator(BaseOperator): 10 | """ 11 | A superclass for operator classes. Defines __init__ with common arguments. 12 | 13 | Includes arguments of clickhouse_driver.Client.execute. 14 | """ 15 | 16 | template_fields = ( # all str-containing arguments 17 | '_sql', 18 | '_parameters', 19 | '_external_tables', 20 | '_query_id', 21 | '_settings', 22 | '_database', 23 | ) 24 | template_ext: t.Sequence[str] = ('.sql',) 25 | template_fields_renderers = { 26 | '_sql': 'sql', 27 | '_parameters': 'json', 28 | '_external_tables': 'json', 29 | '_settings': 'json', 30 | } 31 | 32 | def __init__( 33 | self, 34 | *args, 35 | sql: t.Union[str, t.Iterable[str]], 36 | # arguments of clickhouse_driver.Client.execute 37 | parameters: t.Optional[ExecuteParamsT] = None, 38 | with_column_types: bool = False, 39 | external_tables: t.Optional[t.List[dict]] = None, 40 | query_id: t.Optional[str] = None, 41 | settings: t.Dict[str, t.Any] = None, 42 | types_check: bool = False, 43 | columnar: bool = False, 44 | # arguments of ClickHouseHook.__init__ 45 | clickhouse_conn_id: str = default_conn_name, 46 | database: t.Optional[str] = None, 47 | **kwargs, 48 | ): 49 | super().__init__(*args, **kwargs) 50 | 51 | self._sql = sql 52 | 53 | self._parameters = parameters 54 | self._with_column_types = with_column_types 55 | self._external_tables = external_tables 56 | self._query_id = query_id 57 | self._settings = settings 58 | self._types_check = types_check 59 | self._columnar = columnar 60 | 61 | self._clickhouse_conn_id = clickhouse_conn_id 62 | self._database = database 63 | 64 | def _hook_execute(self) -> ExecuteReturnT: 65 | hook = ClickHouseHook( 66 | clickhouse_conn_id=self._clickhouse_conn_id, 67 | database=self._database, 68 | ) 69 | return hook.execute( 70 | self._sql, 71 | self._parameters, 72 | self._with_column_types, 73 | self._external_tables, 74 | self._query_id, 75 | self._settings, 76 | self._types_check, 77 | self._columnar, 78 | ) 79 | 80 | 81 | class ClickHouseOperator(BaseClickHouseOperator, BaseOperator): 82 | def execute(self, context: t.Dict[str, t.Any]) -> ExecuteReturnT: 83 | return self._hook_execute() 84 | -------------------------------------------------------------------------------- /tests/unit/operators/test_clickhouse.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | 4 | from airflow_clickhouse_plugin.operators.clickhouse import \ 5 | ClickHouseOperator 6 | 7 | 8 | class ClickHouseOperatorTestCase(unittest.TestCase): 9 | def test_arguments(self): 10 | return_value = ClickHouseOperator( 11 | task_id='test1', # required by Airflow 12 | sql='SELECT 1', 13 | parameters=[('test-param', 1)], 14 | with_column_types=True, 15 | external_tables=[{'name': 'ext'}], 16 | query_id='test-query-id', 17 | settings={'test-setting': 1}, 18 | types_check=True, 19 | columnar=True, 20 | clickhouse_conn_id='test-conn-id', 21 | database='test-database', 22 | ).execute(context={}) 23 | with self.subTest('ClickHouseHook.__init__'): 24 | self._hook_cls_mock.assert_called_once_with( 25 | clickhouse_conn_id='test-conn-id', 26 | database='test-database', 27 | ) 28 | with self.subTest('ClickHouseHook.execute'): 29 | self._hook_cls_mock.return_value.execute.assert_called_once_with( 30 | 'SELECT 1', 31 | [('test-param', 1)], 32 | True, 33 | [{'name': 'ext'}], 34 | 'test-query-id', 35 | {'test-setting': 1}, 36 | True, 37 | True, 38 | ) 39 | with self.subTest('return value'): 40 | self.assertIs( 41 | return_value, 42 | self._hook_cls_mock.return_value.execute.return_value, 43 | ) 44 | 45 | def test_defaults(self): 46 | ClickHouseOperator( 47 | task_id='test2', # required by Airflow 48 | sql='SELECT 2', 49 | ).execute(context={}) 50 | with self.subTest('ClickHouseHook.__init__'): 51 | self._hook_cls_mock.assert_called_once_with( 52 | clickhouse_conn_id='clickhouse_default', 53 | database=None, 54 | ) 55 | with self.subTest('ClickHouseHook.execute'): 56 | self._hook_cls_mock.return_value.execute.assert_called_once_with( 57 | 'SELECT 2', 58 | None, 59 | False, 60 | None, 61 | None, 62 | None, 63 | False, 64 | False, 65 | ) 66 | 67 | def setUp(self): 68 | self._hook_cls_patcher = mock.patch('.'.join(( 69 | 'airflow_clickhouse_plugin.operators', 70 | 'clickhouse.ClickHouseHook', 71 | ))) 72 | self._hook_cls_mock = self._hook_cls_patcher.start() 73 | 74 | def tearDown(self): 75 | self._hook_cls_patcher.stop() 76 | 77 | 78 | class ClickHouseOperatorClassTestCase(unittest.TestCase): 79 | def test_template_fields(self): 80 | self.assertSetEqual( 81 | { 82 | '_sql', 83 | '_parameters', 84 | '_external_tables', 85 | '_query_id', 86 | '_settings', 87 | '_database', 88 | }, 89 | frozenset(ClickHouseOperator.template_fields), 90 | ) 91 | 92 | 93 | if __name__ == '__main__': 94 | unittest.main() 95 | -------------------------------------------------------------------------------- /tests/unit/sensors/test_clickhouse.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | 4 | from airflow import AirflowException 5 | 6 | from airflow_clickhouse_plugin.sensors.clickhouse import ClickHouseSensor 7 | 8 | 9 | class ClickHouseSensorTestCase(unittest.TestCase): 10 | def test_arguments(self): 11 | is_success_mock = mock.Mock() 12 | is_failure_mock = mock.Mock(return_value=False) 13 | execute_mock: mock.Mock = self._hook_cls_mock.return_value.execute 14 | return_value = ClickHouseSensor( 15 | task_id='test1', # required by Airflow 16 | sql='SELECT 1', 17 | parameters=[('test-param', 1)], 18 | with_column_types=True, 19 | external_tables=[{'name': 'ext'}], 20 | query_id='test-query-id', 21 | settings={'test-setting': 1}, 22 | types_check=True, 23 | columnar=True, 24 | clickhouse_conn_id='test-conn-id', 25 | database='test-database', 26 | is_success=is_success_mock, 27 | is_failure=is_failure_mock, 28 | ).poke(context={}) 29 | with self.subTest('ClickHouseHook.__init__'): 30 | self._hook_cls_mock.assert_called_once_with( 31 | clickhouse_conn_id='test-conn-id', 32 | database='test-database', 33 | ) 34 | with self.subTest('ClickHouseHook.execute'): 35 | execute_mock.assert_called_once_with( 36 | 'SELECT 1', 37 | [('test-param', 1)], 38 | True, 39 | [{'name': 'ext'}], 40 | 'test-query-id', 41 | {'test-setting': 1}, 42 | True, 43 | True, 44 | ) 45 | with self.subTest('is_failure'): 46 | is_failure_mock.assert_called_once_with(execute_mock.return_value) 47 | with self.subTest('return value'): 48 | is_success_mock.assert_called_once_with(execute_mock.return_value) 49 | self.assertIs(return_value, is_success_mock.return_value) 50 | 51 | def test_defaults(self): 52 | # side_effect is for bool_mock to operate as real bool during __init__ 53 | bool_mock = mock.Mock(side_effect=bool) 54 | with mock.patch('builtins.bool', bool_mock): 55 | operator = ClickHouseSensor( 56 | task_id='test2', # required by Airflow 57 | sql='SELECT 2', 58 | ) 59 | bool_mock.side_effect = None 60 | return_value = operator.poke(context={}) 61 | with self.subTest('ClickHouseHook.__init__'): 62 | self._hook_cls_mock.assert_called_once_with( 63 | clickhouse_conn_id='clickhouse_default', 64 | database=None, 65 | ) 66 | with self.subTest('ClickHouseHook.execute'): 67 | self._hook_cls_mock.return_value.execute.assert_called_once_with( 68 | 'SELECT 2', 69 | None, 70 | False, 71 | None, 72 | None, 73 | None, 74 | False, 75 | False, 76 | ) 77 | with self.subTest('is_success is bool'): 78 | self.assertIs(return_value, bool_mock.return_value) 79 | 80 | def test_failure(self): 81 | is_failure_mock = mock.Mock(return_value=True) 82 | with self.assertRaisesRegex(AirflowException, 'is_failure returned True'): 83 | ClickHouseSensor( 84 | task_id='test3', # required by Airflow 85 | sql='SELECT 3', 86 | is_failure=is_failure_mock, 87 | ).poke(context={}) 88 | 89 | def setUp(self): 90 | self._hook_cls_patcher = mock.patch('.'.join(( 91 | 'airflow_clickhouse_plugin.operators', 92 | 'clickhouse.ClickHouseHook', 93 | ))) 94 | self._hook_cls_mock = self._hook_cls_patcher.start() 95 | 96 | def tearDown(self): 97 | self._hook_cls_patcher.stop() 98 | 99 | 100 | class ClickHouseSensorClassTestCase(unittest.TestCase): 101 | def test_template_fields(self): 102 | self.assertSetEqual( 103 | { 104 | '_sql', 105 | '_parameters', 106 | '_external_tables', 107 | '_query_id', 108 | '_settings', 109 | '_database', 110 | }, 111 | frozenset(ClickHouseSensor.template_fields), 112 | ) 113 | 114 | 115 | if __name__ == '__main__': 116 | unittest.main() 117 | -------------------------------------------------------------------------------- /src/airflow_clickhouse_plugin/hooks/clickhouse.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import typing as t 3 | from itertools import islice 4 | 5 | import clickhouse_driver 6 | from airflow.hooks.base import BaseHook 7 | from airflow.models import Connection 8 | 9 | # annotated according to clickhouse_driver.Client.execute comments 10 | _ParamT = t.NewType('_ParamT', t.Union[list, tuple, dict]) 11 | ExecuteParamsT = t.NewType( 12 | 'ExecuteParamsT', 13 | t.Union[ 14 | # INSERT queries 15 | t.List[_ParamT], t.Tuple[_ParamT, ...], t.Generator[_ParamT, None, None], 16 | # SELECT queries 17 | dict, 18 | ], 19 | ) 20 | ExecuteReturnT = t.NewType( 21 | # clickhouse_driver.Client.execute return type 22 | 'ExecuteReturnT', 23 | t.Union[ 24 | int, # number of inserted rows 25 | t.List[tuple], # list of tuples with rows/columns 26 | t.Tuple[t.List[tuple], t.List[t.Tuple[str, str]]], # with_column_types 27 | ], 28 | ) 29 | 30 | 31 | class ExternalTable(t.TypedDict): 32 | name: str 33 | structure: t.List[t.Tuple[str, str]] 34 | data: t.List[t.Dict[str, t.Any]] 35 | 36 | 37 | default_conn_name = 'clickhouse_default' 38 | 39 | 40 | class ClickHouseHook(BaseHook): 41 | def __init__( 42 | self, 43 | *args, 44 | clickhouse_conn_id: str = default_conn_name, 45 | database: t.Optional[str] = None, 46 | **kwargs, 47 | ): 48 | super().__init__(*args, **kwargs) 49 | self._clickhouse_conn_id = clickhouse_conn_id 50 | self._database = database 51 | 52 | def get_conn(self) -> clickhouse_driver.Client: 53 | conn = self.get_connection(self._clickhouse_conn_id) 54 | return clickhouse_driver.Client(**conn_to_kwargs(conn, self._database)) 55 | 56 | def execute( 57 | self, 58 | sql: t.Union[str, t.Iterable[str]], 59 | # arguments of clickhouse_driver.Client.execute 60 | params: t.Optional[ExecuteParamsT] = None, 61 | with_column_types: bool = False, 62 | external_tables: t.Optional[t.List[ExternalTable]] = None, 63 | query_id: t.Optional[str] = None, 64 | settings: t.Dict[str, t.Any] = None, 65 | types_check: bool = False, 66 | columnar: bool = False, 67 | ) -> ExecuteReturnT: 68 | """ 69 | Passes arguments to ``clickhouse_driver.Client.execute``. 70 | 71 | Allows execution of multiple queries, if ``sql`` argument is an 72 | iterable. Returns results of the last query's execution. 73 | """ 74 | if isinstance(sql, str): 75 | sql = (sql,) 76 | with _disconnecting(self.get_conn()) as conn: 77 | last_result = None 78 | for query in sql: 79 | self.log.info(_format_query_log(query, params)) 80 | last_result = conn.execute( 81 | query, 82 | params=params, 83 | with_column_types=with_column_types, 84 | external_tables=external_tables, 85 | query_id=query_id, 86 | settings=settings, 87 | types_check=types_check, 88 | columnar=columnar, 89 | ) 90 | return last_result 91 | 92 | 93 | def conn_to_kwargs(conn: Connection, database: t.Optional[str]) -> t.Dict[str, t.Any]: 94 | """ Translate Airflow Connection to clickhouse-driver Connection kwargs. """ 95 | connection_kwargs = conn.extra_dejson.copy() 96 | # Connection attributes can be parsed to empty strings by urllib.unparse 97 | connection_kwargs['host'] = conn.host or 'localhost' 98 | if conn.port: 99 | connection_kwargs.update(port=conn.port) 100 | if conn.login: 101 | connection_kwargs.update(user=conn.login) 102 | if conn.password: 103 | connection_kwargs.update(password=conn.password) 104 | if database is not None: 105 | connection_kwargs.update(database=database) 106 | elif conn.schema: 107 | connection_kwargs.update(database=conn.schema) 108 | return connection_kwargs 109 | 110 | 111 | def _format_query_log(query: str, params: ExecuteParamsT) -> str: 112 | return ''.join((query, f' with {_format_params(params)}' if params else '')) 113 | 114 | 115 | def _format_params(params: ExecuteParamsT, limit: int = 10) -> str: 116 | if isinstance(params, t.Generator) or len(params) <= limit: 117 | return str(params) 118 | if isinstance(params, dict): 119 | head = dict(islice(params.items(), limit)) 120 | else: 121 | head = params[:limit] 122 | head_str = str(head) 123 | closing_paren = head_str[-1] 124 | return f'{head_str[:-1]} … and {len(params) - limit} ' \ 125 | f'more parameters{closing_paren}' 126 | 127 | 128 | _DisconnectingT = t.TypeVar('_DisconnectingT') 129 | 130 | 131 | @contextlib.contextmanager 132 | def _disconnecting(thing: _DisconnectingT) -> t.ContextManager[_DisconnectingT]: 133 | """ 134 | Context to automatically disconnect something at the end of a block. 135 | 136 | Similar to ``contextlib.closing`` but calls .disconnect() method on exit. 137 | 138 | Code like this: 139 | 140 | >>> with _disconnecting(.open()) as f: 141 | >>> 142 | 143 | is equivalent to this: 144 | 145 | >>> f = .open() 146 | >>> try: 147 | >>> 148 | >>> finally: 149 | >>> f.disconnect() 150 | """ 151 | try: 152 | yield thing 153 | finally: 154 | thing.disconnect() 155 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | push: 5 | # Pattern matched against refs/tags 6 | tags: 7 | - "**" # Push events to every tag including tags with slashes 8 | 9 | # How to publish a new version: 10 | # 1. Update all hardcoded versions in this file (automation TBD): plugin's, Airflow's, and Python's ones. 11 | # Pick the latest Airflow's and Python's versions supported by the plugin. 12 | # 2. Publish a tag with a name equal to the plugin's current version and "v" prefix: e.g. v1.0.0. 13 | 14 | jobs: 15 | run-local-tests: 16 | name: Test locally on ClickHouse 17 | runs-on: ubuntu-latest 18 | services: 19 | clickhouse: 20 | image: clickhouse/clickhouse-server 21 | ports: 22 | - 9000/tcp 23 | options: >- 24 | --env CLICKHOUSE_SKIP_USER_SETUP=1 25 | steps: 26 | - uses: actions/checkout@v6 27 | - name: Set up Python 28 | uses: actions/setup-python@v6 29 | with: 30 | python-version: "3.12" 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install --upgrade pip 34 | python -m pip install \ 35 | -r requirements.txt \ 36 | --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-3.1.0/constraints-3.12.txt" 37 | - name: Run tests on ClickHouse server 38 | env: 39 | AIRFLOW_CONN_CLICKHOUSE_DEFAULT: "clickhouse://localhost:${{ job.services.clickhouse.ports['9000'] }}" 40 | PYTHONPATH: "${{ github.workspace }}/src" # for tests to import src/airflow_clickhouse_plugin 41 | run: | 42 | cd tests 43 | python -m unittest 44 | 45 | build: 46 | name: Build distribution 📦 47 | runs-on: ubuntu-latest 48 | steps: 49 | - uses: actions/checkout@v6 50 | - name: Set up Python 51 | uses: actions/setup-python@v6 52 | with: 53 | python-version: "3.x" 54 | - name: Install pypa/build 55 | run: >- 56 | python3 -m 57 | pip install 58 | build 59 | --user 60 | - name: Build a binary wheel and a source tarball 61 | run: python3 -m build 62 | - name: Store the distribution packages 63 | uses: actions/upload-artifact@v6 64 | with: 65 | name: python-package-distributions 66 | path: dist/ 67 | 68 | publish-to-testpypi: 69 | name: Publish to TestPyPI 70 | needs: 71 | - build 72 | - run-local-tests 73 | runs-on: ubuntu-latest 74 | environment: 75 | name: testpypi 76 | url: https://test.pypi.org/p/airflow-clickhouse-plugin 77 | permissions: 78 | id-token: write # IMPORTANT: mandatory for trusted publishing 79 | steps: 80 | - name: Download distribution packages 81 | uses: actions/download-artifact@v7 82 | with: 83 | name: python-package-distributions 84 | path: dist/ 85 | - name: Publish to TestPyPI 86 | uses: pypa/gh-action-pypi-publish@release/v1 87 | with: 88 | repository-url: https://test.pypi.org/legacy/ 89 | 90 | run-testpypi-tests: 91 | name: Test TestPyPI on ClickHouse 92 | runs-on: ubuntu-latest 93 | needs: 94 | - publish-to-testpypi 95 | services: 96 | clickhouse: 97 | image: clickhouse/clickhouse-server 98 | ports: 99 | - 9000/tcp 100 | options: >- 101 | --env CLICKHOUSE_SKIP_USER_SETUP=1 102 | steps: 103 | - uses: actions/checkout@v6 104 | - uses: actions/setup-python@v6 105 | with: 106 | python-version: "3.12" 107 | - name: Install airflow-clickhouse-plugin from TestPyPI 108 | run: | 109 | python -m pip install --upgrade pip 110 | python -m pip install \ 111 | --index-url https://test.pypi.org/simple \ 112 | --extra-index-url https://pypi.org/simple \ 113 | airflow-clickhouse-plugin[common.sql]==1.5.0 \ 114 | --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-3.1.0/constraints-3.12.txt" 115 | - name: Run tests on ClickHouse server 116 | env: 117 | AIRFLOW_CONN_CLICKHOUSE_DEFAULT: "clickhouse://localhost:${{ job.services.clickhouse.ports['9000'] }}" 118 | # "src" is not added to PYTHONPATH to run tests using pip-installed version 119 | run: | 120 | cd tests 121 | python -m unittest 122 | 123 | publish-to-pypi: 124 | name: Publish to PyPI 125 | needs: 126 | - build 127 | - run-testpypi-tests 128 | runs-on: ubuntu-latest 129 | environment: 130 | name: pypi 131 | url: https://pypi.org/p/airflow-clickhouse-plugin 132 | permissions: 133 | id-token: write # IMPORTANT: mandatory for trusted publishing 134 | steps: 135 | - name: Download distribution packages 136 | uses: actions/download-artifact@v7 137 | with: 138 | name: python-package-distributions 139 | path: dist/ 140 | - name: Publish to PyPI 141 | uses: pypa/gh-action-pypi-publish@release/v1 142 | 143 | run-pypi-tests: 144 | name: Test PyPI on ClickHouse 145 | runs-on: ubuntu-latest 146 | needs: 147 | - publish-to-pypi 148 | services: 149 | clickhouse: 150 | image: clickhouse/clickhouse-server 151 | ports: 152 | - 9000/tcp 153 | options: >- 154 | --env CLICKHOUSE_SKIP_USER_SETUP=1 155 | steps: 156 | - uses: actions/checkout@v6 157 | - uses: actions/setup-python@v6 158 | with: 159 | python-version: "3.12" 160 | - name: Install airflow-clickhouse-plugin from PyPI 161 | run: | 162 | python -m pip install --upgrade pip 163 | python -m pip install \ 164 | airflow-clickhouse-plugin[common.sql]==1.5.0 \ 165 | --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-3.1.0/constraints-3.12.txt" 166 | - name: Run tests on ClickHouse server 167 | env: 168 | AIRFLOW_CONN_CLICKHOUSE_DEFAULT: "clickhouse://localhost:${{ job.services.clickhouse.ports['9000'] }}" 169 | # "src" is not added to PYTHONPATH to run tests using pip-installed version 170 | run: | 171 | cd tests 172 | python -m unittest 173 | 174 | upload-to-github-release: 175 | name: Sign and upload to GitHub Release 176 | needs: 177 | - publish-to-pypi 178 | runs-on: ubuntu-latest 179 | permissions: 180 | contents: write # IMPORTANT: mandatory for making GitHub Releases 181 | id-token: write # IMPORTANT: mandatory for sigstore 182 | steps: 183 | - name: Download distribution packages 184 | uses: actions/download-artifact@v7 185 | with: 186 | name: python-package-distributions 187 | path: dist/ 188 | - name: Sign with Sigstore 189 | uses: sigstore/gh-action-sigstore-python@v3.2.0 190 | with: 191 | inputs: >- 192 | ./dist/*.tar.gz 193 | ./dist/*.whl 194 | - name: Upload to GitHub Release 195 | env: 196 | GITHUB_TOKEN: ${{ github.token }} 197 | # Upload to GitHub Release using the `gh` CLI. 198 | # `dist/` contains the built packages, and the 199 | # sigstore-produced signatures and certificates. 200 | run: >- 201 | gh release upload 202 | '${{ github.ref_name }}' dist/** 203 | --repo '${{ github.repository }}' 204 | -------------------------------------------------------------------------------- /tests/unit/hooks/test_clickhouse.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import mock 3 | 4 | from airflow.models import Connection 5 | 6 | from airflow_clickhouse_plugin.hooks.clickhouse import ClickHouseHook, \ 7 | _format_query_log 8 | 9 | 10 | class ClickHouseHookTestCase(unittest.TestCase): 11 | def test_arguments(self): 12 | queries = ['SELECT 1', 'SELECT 2'] 13 | client_instance_mock = self._client_cls_mock.return_value 14 | client_instance_mock.execute.side_effect = [1, 2] 15 | self._get_connection_mock.return_value = Connection( 16 | conn_id='test-conn-id', 17 | host='test-host', 18 | port=1234, 19 | login='test-login', 20 | password='test-pass', 21 | schema='test-schema', 22 | extra='{"test_extra": "test-extra-value"}', 23 | ) 24 | 25 | return_value = ClickHouseHook( 26 | clickhouse_conn_id='test-conn-id', 27 | database='test-database', 28 | ).execute( 29 | sql=queries, 30 | params=[('test-param', 1)], 31 | with_column_types=True, 32 | external_tables=[{'name': 'ext'}], # type: ignore 33 | query_id='test-query-id', 34 | settings={'test-setting': 1}, 35 | types_check=True, 36 | columnar=True, 37 | ) 38 | 39 | with self.subTest('connection id'): 40 | self._get_connection_mock.assert_called_once_with('test-conn-id') 41 | 42 | with self.subTest('Client.__init__'): 43 | self._client_cls_mock.assert_called_once_with( 44 | host='test-host', 45 | port=1234, 46 | user='test-login', 47 | password='test-pass', 48 | database='test-database', 49 | test_extra='test-extra-value', 50 | ) 51 | 52 | for query, mock_call \ 53 | in zip(queries, client_instance_mock.execute.mock_calls): 54 | with self.subTest(f'Client.execute {query}'): 55 | self.assertEqual( 56 | mock.call( 57 | query, 58 | params=[('test-param', 1)], 59 | with_column_types=True, 60 | external_tables=[{'name': 'ext'}], 61 | query_id='test-query-id', 62 | settings={'test-setting': 1}, 63 | types_check=True, 64 | columnar=True, 65 | ), 66 | mock_call, 67 | ) 68 | 69 | with self.subTest('Client.disconnect'): 70 | client_instance_mock.disconnect.assert_called_once_with() 71 | 72 | with self.subTest('return value'): 73 | self.assertEqual(2, return_value) 74 | 75 | def test_defaults(self): 76 | client_instance_mock = self._client_cls_mock.return_value 77 | client_instance_mock.execute.return_value = 'test-return-value' 78 | self._get_connection_mock.return_value = Connection() 79 | 80 | return_value = ClickHouseHook().execute('SELECT 1') 81 | 82 | with self.subTest('connection id'): 83 | self._get_connection_mock \ 84 | .assert_called_once_with('clickhouse_default') 85 | 86 | with self.subTest('Client.__init__'): 87 | self._client_cls_mock.assert_called_once_with(host='localhost') 88 | 89 | with self.subTest('Client.execute'): 90 | client_instance_mock.execute.assert_called_once_with( 91 | 'SELECT 1', 92 | params=None, 93 | with_column_types=False, 94 | external_tables=None, 95 | query_id=None, 96 | settings=None, 97 | types_check=False, 98 | columnar=False, 99 | ) 100 | 101 | with self.subTest('Client.disconnect'): 102 | client_instance_mock.disconnect.assert_called_once_with() 103 | 104 | with self.subTest('return value'): 105 | self.assertEqual('test-return-value', return_value) 106 | 107 | def setUp(self): 108 | self._client_cls_patcher = mock.patch('clickhouse_driver.Client') 109 | self._client_cls_mock = self._client_cls_patcher.start() 110 | self._get_connection_patcher = \ 111 | mock.patch.object(ClickHouseHook, 'get_connection') 112 | self._get_connection_mock = self._get_connection_patcher.start() 113 | 114 | def tearDown(self): 115 | self._client_cls_patcher.stop() 116 | self._get_connection_patcher.stop() 117 | 118 | 119 | class ClickHouseHookLoggingTestCase(unittest.TestCase): 120 | def test(self): 121 | test_generator = (_ for _ in range(1)) 122 | subtests = ( 123 | # params=None 124 | ( 125 | 'SELECT 1', None, 126 | 'SELECT 1', 127 | ), 128 | # params: list 129 | ( 130 | 'INSERT INTO test2 VALUES', [], 131 | 'INSERT INTO test2 VALUES', 132 | ), 133 | ( 134 | 'INSERT INTO test3 VALUES', [3], 135 | 'INSERT INTO test3 VALUES with [3]', 136 | ), 137 | ( 138 | 'INSERT INTO test4 VALUES', [val for val in range(11)], 139 | ''.join(( 140 | 'INSERT INTO test4 VALUES with [', 141 | ', '.join(map(str, range(10))), 142 | ' … and 1 more parameters]', 143 | )), 144 | ), 145 | # params: tuple 146 | ( 147 | 'INSERT INTO test5 VALUES', (), 148 | 'INSERT INTO test5 VALUES', 149 | ), 150 | ( 151 | 'INSERT INTO test6 VALUES', (6,), 152 | 'INSERT INTO test6 VALUES with (6,)', 153 | ), 154 | ( 155 | 'INSERT INTO test7 VALUES', tuple(val for val in range(11)), 156 | ''.join(( 157 | 'INSERT INTO test7 VALUES with (', 158 | ', '.join(map(str, range(10))), 159 | ' … and 1 more parameters)', 160 | )), 161 | ), 162 | # params: dict 163 | ( 164 | 'SELECT 8', {}, 165 | 'SELECT 8', 166 | ), 167 | ( 168 | 'SELECT %(param)s', {'param': 9}, 169 | "SELECT %(param)s with {'param': 9}", 170 | ), 171 | ( 172 | 'SELECT 10', {k: k for k in range(11)}, 173 | ''.join(( 174 | 'SELECT 10 with {', 175 | ', '.join(f'{key}: {key}' for key in range(10)), 176 | ' … and 1 more parameters}', 177 | )), 178 | ), 179 | # params: Generator 180 | ( 181 | 'INSERT INTO test11 VALUES', test_generator, 182 | f'INSERT INTO test11 VALUES with {test_generator}', 183 | ), 184 | ) 185 | for query, params, expected in subtests: 186 | with self.subTest((query, params)): 187 | self.assertEqual(expected, _format_query_log(query, params)) 188 | 189 | 190 | if __name__ == '__main__': 191 | unittest.main() 192 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Unit & Integration Tests 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | paths: 7 | - "src/**" 8 | - "tests/**" 9 | - ".github/workflows/tests.yml" 10 | - "requirements.txt" 11 | - "pyproject.toml" 12 | pull_request: 13 | branches: [ "master" ] 14 | paths: 15 | - "src/**" 16 | - "tests/**" 17 | - ".github/workflows/tests.yml" 18 | - "requirements.txt" 19 | - "pyproject.toml" 20 | 21 | env: 22 | PYTHONPATH: "src" # for tests to import src/airflow_clickhouse_plugin 23 | 24 | jobs: 25 | 26 | unit-tests: 27 | runs-on: ubuntu-latest 28 | strategy: 29 | matrix: 30 | python-version: ["3.9", "3.10", "3.11", "3.12"] 31 | airflow-version: ["2.0.2", "2.1.4", "2.2.5", "2.3.4", "2.4.3", "2.5.3", "2.6.3", "2.7.1", "2.8.1", "2.9.0", "2.10.0", "3.0.0", "3.1.0"] 32 | airflow-extras: ["", "[common.sql]"] 33 | exclude: 34 | # constraints files for these combinations are missing 35 | - python-version: "3.10" 36 | airflow-version: "2.0.2" 37 | - python-version: "3.10" 38 | airflow-version: "2.1.4" 39 | - python-version: "3.10" 40 | airflow-version: "2.2.5" 41 | - python-version: "3.11" 42 | airflow-version: "2.0.2" 43 | - python-version: "3.11" 44 | airflow-version: "2.1.4" 45 | - python-version: "3.11" 46 | airflow-version: "2.2.5" 47 | - python-version: "3.11" 48 | airflow-version: "2.3.4" 49 | - python-version: "3.11" 50 | airflow-version: "2.4.3" 51 | - python-version: "3.11" 52 | airflow-version: "2.5.3" 53 | - python-version: "3.12" 54 | airflow-version: "2.0.2" 55 | - python-version: "3.12" 56 | airflow-version: "2.1.4" 57 | - python-version: "3.12" 58 | airflow-version: "2.2.5" 59 | - python-version: "3.12" 60 | airflow-version: "2.3.4" 61 | - python-version: "3.12" 62 | airflow-version: "2.4.3" 63 | - python-version: "3.12" 64 | airflow-version: "2.5.3" 65 | - python-version: "3.12" 66 | airflow-version: "2.6.3" 67 | - python-version: "3.12" 68 | airflow-version: "2.7.1" 69 | - python-version: "3.12" 70 | airflow-version: "2.8.1" 71 | - python-version: "3.9" 72 | airflow-version: "3.1.0" 73 | # common.sql constraint for these Airflow versions is <1.3.0 74 | # => misses SQLExecuteQueryOperator 75 | - airflow-extras: "[common.sql]" 76 | airflow-version: "2.0.2" 77 | - airflow-extras: "[common.sql]" 78 | airflow-version: "2.1.4" 79 | - airflow-extras: "[common.sql]" 80 | airflow-version: "2.2.5" 81 | - airflow-extras: "[common.sql]" 82 | airflow-version: "2.3.4" 83 | - airflow-extras: "[common.sql]" 84 | airflow-version: "2.4.3" 85 | include: 86 | - airflow-extras: "[common.sql]" 87 | tests-pattern: "" 88 | - airflow-extras: "" 89 | tests-pattern: "-p test_clickhouse.py" 90 | steps: 91 | - uses: actions/checkout@v6 92 | - name: Set up Python ${{ matrix.python-version }} 93 | uses: actions/setup-python@v6 94 | with: 95 | python-version: ${{ matrix.python-version }} 96 | - name: Install dependencies 97 | run: | 98 | python -m pip install --upgrade pip 99 | python -m pip install \ 100 | -r requirements.txt \ 101 | apache-airflow${{ matrix.airflow-extras }}==${{ matrix.airflow-version }} \ 102 | --ignore-installed \ 103 | --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-${{ matrix.airflow-version }}/constraints-${{ matrix.python-version }}.txt" 104 | - name: Run unit tests 105 | run: | 106 | python -m unittest discover -t tests -s unit ${{ matrix.tests-pattern }} 107 | 108 | integration-tests: 109 | runs-on: ubuntu-latest 110 | strategy: 111 | matrix: 112 | python-version: ["3.9", "3.10", "3.11", "3.12"] 113 | airflow-version: ["2.0.2", "2.1.4", "2.2.5", "2.3.4", "2.4.3", "2.5.3", "2.6.3", "2.7.1", "2.8.1", "2.9.0", "2.10.0", "3.0.0"] 114 | airflow-extras: ["", "[common.sql]"] 115 | exclude: 116 | # constraints files for these combinations are missing 117 | - python-version: "3.10" 118 | airflow-version: "2.0.2" 119 | - python-version: "3.10" 120 | airflow-version: "2.1.4" 121 | - python-version: "3.10" 122 | airflow-version: "2.2.5" 123 | - python-version: "3.11" 124 | airflow-version: "2.0.2" 125 | - python-version: "3.11" 126 | airflow-version: "2.1.4" 127 | - python-version: "3.11" 128 | airflow-version: "2.2.5" 129 | - python-version: "3.11" 130 | airflow-version: "2.3.4" 131 | - python-version: "3.11" 132 | airflow-version: "2.4.3" 133 | - python-version: "3.11" 134 | airflow-version: "2.5.3" 135 | - python-version: "3.12" 136 | airflow-version: "2.0.2" 137 | - python-version: "3.12" 138 | airflow-version: "2.1.4" 139 | - python-version: "3.12" 140 | airflow-version: "2.2.5" 141 | - python-version: "3.12" 142 | airflow-version: "2.3.4" 143 | - python-version: "3.12" 144 | airflow-version: "2.4.3" 145 | - python-version: "3.12" 146 | airflow-version: "2.5.3" 147 | - python-version: "3.12" 148 | airflow-version: "2.6.3" 149 | - python-version: "3.12" 150 | airflow-version: "2.7.1" 151 | - python-version: "3.12" 152 | airflow-version: "2.8.1" 153 | - python-version: "3.9" 154 | airflow-version: "3.1.0" 155 | # common.sql constraint for these Airflow versions is <1.3.0 156 | # => misses SQLExecuteQueryOperator 157 | - airflow-extras: "[common.sql]" 158 | airflow-version: "2.0.2" 159 | - airflow-extras: "[common.sql]" 160 | airflow-version: "2.1.4" 161 | - airflow-extras: "[common.sql]" 162 | airflow-version: "2.2.5" 163 | - airflow-extras: "[common.sql]" 164 | airflow-version: "2.3.4" 165 | - airflow-extras: "[common.sql]" 166 | airflow-version: "2.4.3" 167 | include: 168 | - airflow-extras: "[common.sql]" 169 | tests-pattern: "" 170 | - airflow-extras: "" 171 | tests-pattern: "-p test_clickhouse.py" 172 | services: 173 | clickhouse: 174 | image: clickhouse/clickhouse-server 175 | ports: 176 | - 9000/tcp 177 | options: >- 178 | --env CLICKHOUSE_SKIP_USER_SETUP=1 179 | steps: 180 | - uses: actions/checkout@v6 181 | - name: Set up Python ${{ matrix.python-version }} 182 | uses: actions/setup-python@v6 183 | with: 184 | python-version: ${{ matrix.python-version }} 185 | - name: Install dependencies 186 | run: | 187 | python -m pip install --upgrade pip 188 | python -m pip install \ 189 | -r requirements.txt \ 190 | apache-airflow${{ matrix.airflow-extras }}==${{ matrix.airflow-version }} \ 191 | --ignore-installed \ 192 | --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-${{ matrix.airflow-version }}/constraints-${{ matrix.python-version }}.txt" 193 | - name: Run tests on ClickHouse server 194 | env: 195 | AIRFLOW_CONN_CLICKHOUSE_DEFAULT: "clickhouse://localhost:${{ job.services.clickhouse.ports['9000'] }}" 196 | run: | 197 | python -m unittest discover -t tests -s integration ${{ matrix.tests-pattern }} 198 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Airflow ClickHouse Plugin 2 | 3 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/airflow-clickhouse-plugin) 4 | ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/bryzgaloff/airflow-clickhouse-plugin/tests.yml?branch=master) 5 | ![GitHub contributors](https://img.shields.io/github/contributors/bryzgaloff/airflow-clickhouse-plugin?color=blue) 6 | 7 | 🔝 The most popular [Apache Airflow][airflow] plugin for ClickHouse, ranked in the top 1% of downloads [on PyPI](https://pypi.org/project/airflow-clickhouse-plugin/). Based on awesome [mymarilyn/clickhouse-driver][ch-driver]. 8 | 9 | This plugin provides two families of operators: richer [`clickhouse_driver.Client.execute`-based](#clickhouse-driver-family) and standardized [compatible with Python DB API 2.0](#python-db-api-20-family). 10 | 11 | Both operators' families are fully supported and covered with tests for different versions of Airflow and Python. 12 | 13 | ## `clickhouse-driver` family 14 | 15 | - `ClickHouseOperator` 16 | - `ClickHouseHook` 17 | - `ClickHouseSensor` 18 | 19 | These operators are based on [mymarilyn/clickhouse-driver][ch-driver]'s `Client.execute` method and arguments. They offer a full functionality of `clickhouse-driver` and are recommended if you are starting fresh with ClickHouse in Airflow. 20 | 21 | ### Features 22 | 23 | - **SQL Templating**: SQL queries and other parameters are templated. 24 | - **Multiple SQL Queries**: execute run multiple SQL queries within a single `ClickHouseOperator`. The result of the last query is pushed to XCom (configurable by `do_xcom_push`). 25 | - **Logging**: executed queries are logged in a visually pleasing format, making it easier to track and debug. 26 | - **Efficient Native ClickHouse Protocol**: Utilizes efficient _native_ ClickHouse TCP protocol, thanks to [clickhouse-driver][ch-driver-docs]. **Does not support HTTP protocol.** 27 | - **Custom Connection Parameters**: Supports additional ClickHouse [connection parameters][ch-driver-connection], such as various timeouts, `compression`, `secure`, through the Airflow [Connection.extra][airflow-conn-extra] property. 28 | 29 | See reference and examples [below](#usage). 30 | 31 | ### Installation and dependencies 32 | 33 | `pip install -U airflow-clickhouse-plugin` 34 | 35 | Dependencies: only `apache-airflow` and `clickhouse-driver`. 36 | 37 | ## Python DB API 2.0 family 38 | 39 | - Operators: 40 | - `ClickHouseSQLExecuteQueryOperator` 41 | - `ClickHouseSQLColumnCheckOperator` 42 | - `ClickHouseSQLTableCheckOperator` 43 | - `ClickHouseSQLCheckOperator` 44 | - `ClickHouseSQLValueCheckOperator` 45 | - `ClickHouseSQLIntervalCheckOperator` 46 | - `ClickHouseSQLThresholdCheckOperator` 47 | - `ClickHouseBranchSQLOperator` 48 | - `ClickHouseDbApiHook` 49 | - `ClickHouseSqlSensor` 50 | 51 | These operators combine [`clickhouse_driver.dbapi`][ch-driver-db-api] with [apache-airflow-providers-common-sql]. While they have limited functionality compared to `Client.execute` (not all arguments are supported), they provide a standardized interface. This is useful when porting Airflow pipelines to ClickHouse from another SQL provider backed by `common.sql` Airflow package, such as MySQL, Postgres, BigQuery, and others. 52 | 53 | The feature set of this version is fully based on `common.sql` Airflow provider: refer to its [reference][common-sql-reference] and [examples][common-sql-examples] for details. 54 | 55 | An example is also available [below](#db-api-20-clickhousesqlsensor-and-clickhousesqlexecutequeryoperator-example). 56 | 57 | ### Installation and dependencies 58 | 59 | Add `common.sql` extra when installing the plugin: `pip install -U airflow-clickhouse-plugin[common.sql]` — to enable DB API 2.0 operators. 60 | 61 | Dependencies: `apache-airflow-providers-common-sql` (usually pre-packed with Airflow) in addition to `apache-airflow` and `clickhouse-driver`. 62 | 63 | ## Python and Airflow versions support 64 | 65 | Different versions of the plugin support different combinations of Python and Airflow versions. We _primarily_ support **Airflow 2.0+ and Python 3.8+**. If you need to use the plugin with older Python-Airflow combinations, pick a suitable plugin version: 66 | 67 | | airflow-clickhouse-plugin version | Airflow version | Python version | 68 | |-----------------------------------|-------------------------|--------------------| 69 | | 1.6.0 | \>=2.0.0,<3.2.0 | ~=3.10 | 70 | | 1.5.0 | \>=2.0.0,<3.1.0 | ~=3.9 | 71 | | 1.4.0 | \>=2.0.0,<2.11.0 | ~=3.8 | 72 | | 1.3.0 | \>=2.0.0,<2.10.0 | ~=3.8 | 73 | | 1.2.0 | \>=2.0.0,<2.9.0 | ~=3.8 | 74 | | 1.1.0 | \>=2.0.0,<2.8.0 | ~=3.8 | 75 | | 1.0.0 | \>=2.0.0,<2.7.0 | ~=3.8 | 76 | | 0.11.0 | ~=2.0.0,\>=2.2.0,<2.7.0 | ~=3.7 | 77 | | 0.10.0,0.10.1 | ~=2.0.0,\>=2.2.0,<2.6.0 | ~=3.7 | 78 | | 0.9.0,0.9.1 | ~=2.0.0,\>=2.2.0,<2.5.0 | ~=3.7 | 79 | | 0.8.2 | \>=2.0.0,<2.4.0 | ~=3.7 | 80 | | 0.8.0,0.8.1 | \>=2.0.0,<2.3.0 | ~=3.6 | 81 | | 0.7.0 | \>=2.0.0,<2.2.0 | ~=3.6 | 82 | | 0.6.0 | ~=2.0.1 | ~=3.6 | 83 | | \>=0.5.4,<0.6.0 | ~=1.10.6 | \>=2.7 or >=3.5.\* | 84 | | \>=0.5.0,<0.5.4 | ==1.10.6 | \>=2.7 or >=3.5.\* | 85 | 86 | `~=` means compatible release, see [PEP 440][pep-440-compatible-releases] for an explanation. 87 | 88 | [DB API 2.0 functionality](#python-db-api-20-family) requires `apache-airflow>2.9.3` (strictly greater since versions up to 2.9.3 had an MRO-related bug, see [#87](https://github.com/bryzgaloff/airflow-clickhouse-plugin/issues/87)) and `apache-airflow-providers-common-sql>=1.3`: earlier versions are not supported. 89 | 90 | Previous versions of the plugin might require `pandas` extra: `pip install airflow-clickhouse-plugin[pandas]==0.11.0`. Check out earlier versions of `README.md` for details. 91 | 92 | # Usage 93 | 94 | To see examples [scroll down](#examples). To run them, [create an Airflow connection to ClickHouse](#how-to-create-an-airflow-connection-to-clickhouse). 95 | 96 | ## ClickHouseOperator reference 97 | 98 | To import `ClickHouseOperator` use `from airflow_clickhouse_plugin.operators.clickhouse import ClickHouseOperator`. 99 | 100 | Supported arguments: 101 | * `sql` (templated, required): query (if argument is a single `str`) or multiple queries (iterable of `str`). Supports files with `.sql` extension. 102 | * `clickhouse_conn_id`: Airflow connection id. Connection schema is described [below](#clickhouse-connection-schema). Default connection id is `clickhouse_default`. 103 | * Arguments of [`clickhouse_driver.Client.execute` method][ch-driver-execute-summary]: 104 | * `parameters` (templated): passed `params` of the `execute` method. (Renamed to avoid name conflict with Airflow tasks' `params` argument.) 105 | * `dict` for `SELECT` queries. 106 | * `list`/`tuple`/generator for `INSERT` queries. 107 | * If multiple queries are provided via `sql` then the `parameters` are passed to _all_ of them. 108 | * `with_column_types` (not templated). 109 | * `external_tables` (templated). 110 | * `query_id` (templated). 111 | * `settings` (templated). 112 | * `types_check` (not templated). 113 | * `columnar` (not templated). 114 | * For the documentation of these arguments, refer to [`clickhouse_driver.Client.execute` API reference][ch-driver-execute-reference]. 115 | * `database` (templated): if present, overrides `schema` of Airflow connection. 116 | * Other arguments (including a required `task_id`) are inherited from Airflow [BaseOperator][airflow-base-op]. 117 | 118 | Result of the _last_ query is pushed to XCom (disable using `do_xcom_push=False` argument). 119 | 120 | In other words, the operator simply wraps [`ClickHouseHook.execute` method](#clickhousehook-reference). 121 | 122 | See [example](#clickhouseoperator-example) below. 123 | 124 | ## ClickHouseHook reference 125 | 126 | To import `ClickHouseHook` use `from airflow_clickhouse_plugin.hooks.clickhouse import ClickHouseHook`. 127 | 128 | Supported kwargs of constructor (`__init__` method): 129 | * `clickhouse_conn_id`: Airflow connection id. Connection schema is described [below](#clickhouse-connection-schema). Default connection id is `clickhouse_default`. 130 | * `database`: if present, overrides `schema` of Airflow connection. 131 | 132 | Defines `ClickHouseHook.execute` method which simply wraps [`clickhouse_driver.Client.execute`][ch-driver-execute-reference]. It has all the same arguments, except of: 133 | * `sql` (instead of `execute`'s `query`): query (if argument is a single `str`) or multiple queries (iterable of `str`). 134 | 135 | `ClickHouseHook.execute` returns a result of the _last_ query. 136 | 137 | Also, the hook defines `get_conn()` method which returns an underlying [`clickhouse_driver.Client`][ch-driver-client] instance. 138 | 139 | See [example](#clickhousehook-example) below. 140 | 141 | ## ClickHouseSensor reference 142 | 143 | To import `ClickHouseSensor` use `from airflow_clickhouse_plugin.sensors.clickhouse import ClickHouseSensor`. 144 | 145 | This class wraps [`ClickHouseHook.execute` method](#clickhousehook-reference) into an [Airflow sensor][airflow-sensor]. Supports all the arguments of [`ClickHouseOperator`](#clickhouseoperator-reference) and additionally: 146 | * `is_success`: a callable which accepts a single argument — a return value of `ClickHouseHook.execute`. If a return value of `is_success` is truthy, the sensor succeeds. By default, the callable is `bool`: i.e. if the return value of `ClickHouseHook.execute` is truthy, the sensor succeeds. Usually, `execute` is a list of records returned by query: thus, by default it is falsy if no records are returned. 147 | * `is_failure`: a callable which accepts a single argument — a return value of `ClickHouseHook.execute`. If a return value of `is_failure` is truthy, the sensor raises `AirflowException`. By default, `is_failure` is `None` and no failure check is performed. 148 | 149 | See [example](#clickhousesensor-example) below. 150 | 151 | ## How to create an Airflow connection to ClickHouse 152 | 153 | As a `type` of a new connection, choose **SQLite** or any other SQL database. There is **no** special ClickHouse connection type yet, so we use any SQL as the closest one. 154 | 155 | All the connection attributes are optional: default host is `localhost` and other credentials [have defaults](#default-values) defined by `clickhouse-driver`. If you use non-default values, set them according to the [connection schema](#clickhouse-connection-schema). 156 | 157 | If you use a secure connection to ClickHouse (this requires additional configurations on ClickHouse side), set `extra` to `{"secure":true}`. All `extra` connection parameters are passed to [`clickhouse_driver.Client`][ch-driver-client] as-is. 158 | 159 | ### ClickHouse connection schema 160 | 161 | [`clickhouse_driver.Client`][ch-driver-client] is initialized with attributes stored in Airflow [Connection attributes][airflow-connection-howto]: 162 | 163 | | Airflow Connection attribute | `Client.__init__` argument | 164 | |------------------------------|----------------------------| 165 | | `host` | `host` | 166 | | `port` (`int`) | `port` | 167 | | `schema` | `database` | 168 | | `login` | `user` | 169 | | `password` | `password` | 170 | | `extra` | `**kwargs` | 171 | 172 | `database` argument of `ClickHouseOperator`, `ClickHouseHook`, `ClickHouseSensor`, and others overrides `schema` attribute of the Airflow connection. 173 | 174 | ### Extra arguments 175 | 176 | You may set non-standard arguments of [`clickhouse_driver.Client`][ch-driver-client], such as timeouts, `compression`, `secure`, etc. using Airflow's [`Connection.extra`][airflow-conn-extra] attribute. The attribute should contain a JSON object which will be [deserialized][airflow-conn-dejson] and all of its properties will be passed as-is to the `Client`. 177 | 178 | For example, if Airflow connection contains `extra='{"secure": true}'` then the `Client.__init__` will receive `secure=True` keyword argument in addition to other connection attributes. 179 | 180 | #### Compression 181 | 182 | You should install specific packages to support compression. For example, for lz4: 183 | 184 | ```bash 185 | pip3 install clickhouse-cityhash lz4 186 | ``` 187 | 188 | Then you should include `compression` parameter in airflow connection uri: `extra='{"compression":"lz4"}'`. You can get additional information about extra options from [official documentation of clickhouse-driver][ch-driver-pypi-install]. 189 | 190 | Connection URI with compression will look like `clickhouse://login:password@host:port/?compression=lz4`. 191 | 192 | See [official documentation][airflow-connection-howto] to learn more about connections management in Airflow. 193 | 194 | ### Default Values 195 | 196 | If some Airflow connection attribute is not set, it is not passed to `clickhouse_driver.Client`. In such cases, the plugin uses a default value from the corresponding [`clickhouse_driver.Connection`][ch-driver-connection] argument. For instance, `user` defaults to `'default'`. 197 | 198 | This means that the plugin itself does not define any default values for the ClickHouse connection. You may fully rely on default values of the [clickhouse-driver][ch-driver] version you use. 199 | 200 | The only exception is `host`: if the attribute of Airflow connection is not set then `'localhost'` is used. 201 | 202 | ### Default connection 203 | 204 | By default, the plugin uses Airflow connection with id `'clickhouse_default'`. 205 | 206 | ## Examples 207 | 208 | ### ClickHouseOperator example 209 | 210 | ```python 211 | from airflow import DAG 212 | from airflow_clickhouse_plugin.operators.clickhouse import ClickHouseOperator 213 | from airflow.operators.python import PythonOperator 214 | from airflow.utils.dates import days_ago 215 | 216 | with DAG( 217 | dag_id='update_income_aggregate', 218 | start_date=days_ago(2), 219 | ) as dag: 220 | ClickHouseOperator( 221 | task_id='update_income_aggregate', 222 | database='default', 223 | sql=( 224 | ''' 225 | INSERT INTO aggregate 226 | SELECT eventDt, sum(price * qty) AS income FROM sales 227 | WHERE eventDt = '{{ ds }}' GROUP BY eventDt 228 | ''', ''' 229 | OPTIMIZE TABLE aggregate ON CLUSTER {{ var.value.cluster_name }} 230 | PARTITION toDate('{{ execution_date.format('%Y-%m-01') }}') 231 | ''', ''' 232 | SELECT sum(income) FROM aggregate 233 | WHERE eventDt BETWEEN 234 | '{{ execution_date.start_of('month').to_date_string() }}' 235 | AND '{{ execution_date.end_of('month').to_date_string() }}' 236 | ''', 237 | # result of the last query is pushed to XCom 238 | ), 239 | # query_id is templated and allows to quickly identify query in ClickHouse logs 240 | query_id='{{ ti.dag_id }}-{{ ti.task_id }}-{{ ti.run_id }}-{{ ti.try_number }}', 241 | clickhouse_conn_id='clickhouse_test', 242 | ) >> PythonOperator( 243 | task_id='print_month_income', 244 | python_callable=lambda task_instance: 245 | # pulling XCom value and printing it 246 | print(task_instance.xcom_pull(task_ids='update_income_aggregate')), 247 | ) 248 | ``` 249 | 250 | ### ClickHouseHook example 251 | 252 | ```python 253 | from airflow import DAG 254 | from airflow_clickhouse_plugin.hooks.clickhouse import ClickHouseHook 255 | from airflow.providers.sqlite.hooks.sqlite import SqliteHook 256 | from airflow.operators.python import PythonOperator 257 | from airflow.utils.dates import days_ago 258 | 259 | 260 | def sqlite_to_clickhouse(): 261 | sqlite_hook = SqliteHook() 262 | ch_hook = ClickHouseHook() 263 | records = sqlite_hook.get_records('SELECT * FROM some_sqlite_table') 264 | ch_hook.execute('INSERT INTO some_ch_table VALUES', records) 265 | 266 | 267 | with DAG( 268 | dag_id='sqlite_to_clickhouse', 269 | start_date=days_ago(2), 270 | ) as dag: 271 | dag >> PythonOperator( 272 | task_id='sqlite_to_clickhouse', 273 | python_callable=sqlite_to_clickhouse, 274 | ) 275 | ``` 276 | 277 | Important note: don't try to insert values using `ch_hook.execute('INSERT INTO some_ch_table VALUES (1)')` literal form. [`clickhouse-driver` requires][ch-driver-insert] values for `INSERT` query to be provided via `parameters` due to specifics of the native ClickHouse protocol. 278 | 279 | ### ClickHouseSensor example 280 | 281 | ```python 282 | from airflow import DAG 283 | from airflow_clickhouse_plugin.sensors.clickhouse import ClickHouseSensor 284 | from airflow_clickhouse_plugin.operators.clickhouse import ClickHouseOperator 285 | from airflow.utils.dates import days_ago 286 | 287 | 288 | with DAG( 289 | dag_id='listen_warnings', 290 | start_date=days_ago(2), 291 | ) as dag: 292 | dag >> ClickHouseSensor( 293 | task_id='poke_events_count', 294 | database='monitor', 295 | sql="SELECT count() FROM warnings WHERE eventDate = '{{ ds }}'", 296 | is_success=lambda cnt: cnt > 10000, 297 | ) >> ClickHouseOperator( 298 | task_id='create_alert', 299 | database='alerts', 300 | sql=''' 301 | INSERT INTO events SELECT eventDate, count() 302 | FROM monitor.warnings WHERE eventDate = '{{ ds }}' 303 | ''', 304 | ) 305 | ``` 306 | 307 | ### DB API 2.0: ClickHouseSqlSensor and ClickHouseSQLExecuteQueryOperator example 308 | 309 | ```python 310 | from airflow import DAG 311 | from airflow_clickhouse_plugin.sensors.clickhouse_dbapi import ClickHouseSqlSensor 312 | from airflow_clickhouse_plugin.operators.clickhouse_dbapi import ClickHouseSQLExecuteQueryOperator 313 | from airflow.utils.dates import days_ago 314 | 315 | 316 | with DAG( 317 | dag_id='listen_warnings', 318 | start_date=days_ago(2), 319 | ) as dag: 320 | dag >> ClickHouseSqlSensor( 321 | task_id='poke_events_count', 322 | hook_params=dict(schema='monitor'), 323 | sql="SELECT count() FROM warnings WHERE eventDate = '{{ ds }}'", 324 | success=lambda cnt: cnt > 10000, 325 | conn_id=None, # required by common.sql SqlSensor; use None for default 326 | ) >> ClickHouseSQLExecuteQueryOperator( 327 | task_id='create_alert', 328 | database='alerts', 329 | sql=''' 330 | INSERT INTO events SELECT eventDate, count() 331 | FROM monitor.warnings WHERE eventDate = '{{ ds }}' 332 | ''', 333 | ) 334 | ``` 335 | 336 | # How to run tests 337 | 338 | Unit tests: `python3 -m unittest discover -t tests -s unit` 339 | 340 | Integration tests require access to a ClickHouse server. Here is how to set up a local test environment using Docker: 341 | * Run ClickHouse server in a local Docker container: `docker run -p 9000:9000 --ulimit nofile=262144:262144 -it clickhouse/clickhouse-server` 342 | * Run tests with Airflow connection details set [via environment variable][airflow-conn-env]: `PYTHONPATH=src AIRFLOW_CONN_CLICKHOUSE_DEFAULT=clickhouse://localhost python3 -m unittest discover -t tests -s integration` 343 | * Stop the container after running the tests to deallocate its resources. 344 | 345 | Run all (unit&integration) tests with ClickHouse connection defined: `PYTHONPATH=src AIRFLOW_CONN_CLICKHOUSE_DEFAULT=clickhouse://localhost python3 -m unittest discover -s tests` 346 | 347 | ## GitHub Actions 348 | 349 | [GitHub Action][github-action-src] is configured for this project. 350 | 351 | ## Run all tests inside Docker 352 | 353 | Start a ClickHouse server inside Docker: `docker exec -it $(docker run --rm -d clickhouse/clickhouse-server) bash` 354 | 355 | The above command will open `bash` inside the container. 356 | 357 | Install dependencies into container and run tests (execute inside container): 358 | 359 | ```bash 360 | apt-get update 361 | apt-get install -y python3 python3-pip git make 362 | git clone https://github.com/whisklabs/airflow-clickhouse-plugin.git 363 | cd airflow-clickhouse-plugin 364 | python3 -m pip install -r requirements.txt 365 | PYTHONPATH=src AIRFLOW_CONN_CLICKHOUSE_DEFAULT=clickhouse://localhost python3 -m unittest discover -s tests 366 | ``` 367 | 368 | Stop the container. 369 | 370 | # Contributors 371 | 372 | * Created by Anton Bryzgalov, [@bryzgaloff](https://github.com/bryzgaloff), originally at [Whisk, Samsung](https://github.com/whisklabs) 373 | * Inspired by Viktor Taranenko, [@viktortnk](https://github.com/viktortnk) (Whisk, Samsung) 374 | 375 | Community contributors: 376 | 377 | * Danila Ganchar, [@d-ganchar](https://github.com/d-ganchar) 378 | * Mikhail, [@glader](https://github.com/glader) 379 | * Alexander Chashnikov, [@ne1r0n](https://github.com/ne1r0n) 380 | * Simone Brundu, [@saimon46](https://github.com/saimon46) 381 | * [@gkarg](https://github.com/gkarg) 382 | * Stanislav Morozov, [@r3b-fish](https://github.com/r3b-fish) 383 | * Sergey Bychkov, [@SergeyBychkov](https://github.com/SergeyBychkov) 384 | * [@was-av](https://github.com/was-av) 385 | * Maxim Tarasov, [@MaximTar](https://github.com/MaximTar) 386 | * [@dvnrvn](https://github.com/dvnrvn) 387 | * Giovanni Corsetti, [@CorsettiS](https://github.com/CorsettiS) 388 | * Dmytro Zhyzniev, [@1ng4lipt](https://github.com/1ng4lipt) 389 | * Anton Bezdenezhnykh, [@GaMeRaM](https://github.com/GaMeRaM) 390 | * Andrey, [@bobelev](https://github.com/bobelev) 391 | * Misha Epikhin, [@epikhinm](https://github.com/epikhinm) 392 | * igor, [@cra](https://github.com/cra) 393 | * Yuriy Natarov, [@Acuion](https://github.com/Acuion) 394 | * Daniil Parmon, [@dparmon](https://github.com/dparmon) 395 | * Behzod Mansurov, [@star-tek-mb](https://github.com/star-tek-mb) 396 | 397 | 398 | [airflow]: https://airflow.apache.org/ 399 | [ch-driver]: https://github.com/mymarilyn/clickhouse-driver 400 | [ch-driver-docs]: https://clickhouse-driver.readthedocs.io/en/latest/ 401 | [ch-driver-execute-summary]: https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#selecting-data 402 | [ch-driver-execute-reference]: https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute 403 | [airflow-base-op]: https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/models/baseoperator/index.html 404 | [ch-driver-insert]: https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#inserting-data 405 | [ch-driver-client]: https://clickhouse-driver.readthedocs.io/en/latest/api.html#client 406 | [ch-driver-connection]: https://clickhouse-driver.readthedocs.io/en/latest/api.html#connection 407 | [airflow-conn-extra]: https://airflow.apache.org/docs/2.1.0/_api/airflow/models/connection/index.html#airflow.models.connection.Connection.extra 408 | [airflow-connection-howto]: https://airflow.apache.org/docs/apache-airflow/stable/howto/connection.html 409 | [airflow-conn-dejson]: https://airflow.apache.org/docs/apache-airflow/2.1.0/_api/airflow/models/index.html?highlight=connection#airflow.models.Connection.extra_dejson 410 | [airflow-conn-env]: https://airflow.apache.org/docs/apache-airflow/2.1.0/howto/connection.html#storing-a-connection-in-environment-variables 411 | [github-action-src]: https://github.com/whisklabs/airflow-clickhouse-plugin/tree/master/.github/workflows 412 | [pep-440-compatible-releases]: https://peps.python.org/pep-0440/#compatible-release 413 | [apache-airflow-providers-common-sql]: https://airflow.apache.org/docs/apache-airflow-providers-common-sql/stable/index.html 414 | [db-api-pep]: https://peps.python.org/pep-0249/ 415 | [airflow-sensor]: https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/sensors.html 416 | [ch-driver-pypi-install]: https://clickhouse-driver.readthedocs.io/en/latest/installation.html#installation-pypi 417 | [common-sql-reference]: https://airflow.apache.org/docs/apache-airflow-providers-common-sql/stable/_api/airflow/providers/common/sql/index.html 418 | [common-sql-examples]: https://airflow.apache.org/docs/apache-airflow-providers-common-sql/stable/operators.html 419 | [ch-driver-db-api]: https://clickhouse-driver.readthedocs.io/en/latest/dbapi.html 420 | --------------------------------------------------------------------------------