├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── __init__.py ├── clickhouse_migrate ├── __init__.py └── migrate.py ├── requirements.txt ├── setup.py └── tests ├── clickhouse_migrations └── V1__create_test.sql ├── migrations_mixed ├── V1__create_test.sql └── V2_create_multiple_tables.json ├── migrations_seq ├── V1_sequential_dmls.json └── test.csv.gz └── test_clickhouse_migration.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.toptal.com/developers/gitignore/api/python,intellij 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,intellij 4 | 5 | ### Intellij ### 6 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 7 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 8 | .idea/ 9 | # User-specific stuff 10 | .idea/**/workspace.xml 11 | .idea/**/tasks.xml 12 | .idea/**/usage.statistics.xml 13 | .idea/**/dictionaries 14 | .idea/**/shelf 15 | 16 | # Generated files 17 | .idea/**/contentModel.xml 18 | 19 | # Sensitive or high-churn files 20 | .idea/**/dataSources/ 21 | .idea/**/dataSources.ids 22 | .idea/**/dataSources.local.xml 23 | .idea/**/sqlDataSources.xml 24 | .idea/**/dynamic.xml 25 | .idea/**/uiDesigner.xml 26 | .idea/**/dbnavigator.xml 27 | 28 | # Gradle 29 | .idea/**/gradle.xml 30 | .idea/**/libraries 31 | 32 | # Gradle and Maven with auto-import 33 | # When using Gradle or Maven with auto-import, you should exclude module files, 34 | # since they will be recreated, and may cause churn. Uncomment if using 35 | # auto-import. 36 | # .idea/artifacts 37 | # .idea/compiler.xml 38 | # .idea/jarRepositories.xml 39 | # .idea/modules.xml 40 | # .idea/*.iml 41 | # .idea/modules 42 | # *.iml 43 | # *.ipr 44 | 45 | # CMake 46 | cmake-build-*/ 47 | 48 | # Mongo Explorer plugin 49 | .idea/**/mongoSettings.xml 50 | 51 | # File-based project format 52 | *.iws 53 | 54 | # IntelliJ 55 | out/ 56 | 57 | # mpeltonen/sbt-idea plugin 58 | .idea_modules/ 59 | 60 | # JIRA plugin 61 | atlassian-ide-plugin.xml 62 | 63 | # Cursive Clojure plugin 64 | .idea/replstate.xml 65 | 66 | # Crashlytics plugin (for Android Studio and IntelliJ) 67 | com_crashlytics_export_strings.xml 68 | crashlytics.properties 69 | crashlytics-build.properties 70 | fabric.properties 71 | 72 | # Editor-based Rest Client 73 | .idea/httpRequests 74 | 75 | # Android studio 3.1+ serialized cache file 76 | .idea/caches/build_file_checksums.ser 77 | 78 | ### Intellij Patch ### 79 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 80 | 81 | # *.iml 82 | # modules.xml 83 | # .idea/misc.xml 84 | # *.ipr 85 | 86 | # Sonarlint plugin 87 | # https://plugins.jetbrains.com/plugin/7973-sonarlint 88 | .idea/**/sonarlint/ 89 | 90 | # SonarQube Plugin 91 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin 92 | .idea/**/sonarIssues.xml 93 | 94 | # Markdown Navigator plugin 95 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced 96 | .idea/**/markdown-navigator.xml 97 | .idea/**/markdown-navigator-enh.xml 98 | .idea/**/markdown-navigator/ 99 | 100 | # Cache file creation bug 101 | # See https://youtrack.jetbrains.com/issue/JBR-2257 102 | .idea/$CACHE_FILE$ 103 | 104 | # CodeStream plugin 105 | # https://plugins.jetbrains.com/plugin/12206-codestream 106 | .idea/codestream.xml 107 | 108 | ### Python ### 109 | # Byte-compiled / optimized / DLL files 110 | __pycache__/ 111 | *.py[cod] 112 | *$py.class 113 | 114 | # C extensions 115 | *.so 116 | 117 | # Distribution / packaging 118 | .Python 119 | build/ 120 | develop-eggs/ 121 | dist/ 122 | downloads/ 123 | eggs/ 124 | .eggs/ 125 | lib/ 126 | lib64/ 127 | parts/ 128 | sdist/ 129 | var/ 130 | wheels/ 131 | pip-wheel-metadata/ 132 | share/python-wheels/ 133 | *.egg-info/ 134 | .installed.cfg 135 | *.egg 136 | MANIFEST 137 | 138 | # PyInstaller 139 | # Usually these files are written by a python script from a template 140 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 141 | *.manifest 142 | *.spec 143 | 144 | # Installer logs 145 | pip-log.txt 146 | pip-delete-this-directory.txt 147 | 148 | # Unit test / coverage reports 149 | htmlcov/ 150 | .tox/ 151 | .nox/ 152 | .coverage 153 | .coverage.* 154 | .cache 155 | nosetests.xml 156 | coverage.xml 157 | *.cover 158 | *.py,cover 159 | .hypothesis/ 160 | .pytest_cache/ 161 | pytestdebug.log 162 | 163 | # Translations 164 | *.mo 165 | *.pot 166 | 167 | # Django stuff: 168 | *.log 169 | local_settings.py 170 | db.sqlite3 171 | db.sqlite3-journal 172 | 173 | # Flask stuff: 174 | instance/ 175 | .webassets-cache 176 | 177 | # Scrapy stuff: 178 | .scrapy 179 | 180 | # Sphinx documentation 181 | docs/_build/ 182 | doc/_build/ 183 | 184 | # PyBuilder 185 | target/ 186 | 187 | # Jupyter Notebook 188 | .ipynb_checkpoints 189 | 190 | # IPython 191 | profile_default/ 192 | ipython_config.py 193 | 194 | # pyenv 195 | .python-version 196 | 197 | # pipenv 198 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 199 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 200 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 201 | # install all needed dependencies. 202 | #Pipfile.lock 203 | 204 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 205 | __pypackages__/ 206 | 207 | # Celery stuff 208 | celerybeat-schedule 209 | celerybeat.pid 210 | 211 | # SageMath parsed files 212 | *.sage.py 213 | 214 | # Environments 215 | .env 216 | .venv 217 | env/ 218 | venv/ 219 | ENV/ 220 | env.bak/ 221 | venv.bak/ 222 | 223 | # Spyder project settings 224 | .spyderproject 225 | .spyproject 226 | 227 | # Rope project settings 228 | .ropeproject 229 | 230 | # mkdocs documentation 231 | /site 232 | 233 | # mypy 234 | .mypy_cache/ 235 | .dmypy.json 236 | dmypy.json 237 | 238 | # Pyre type checker 239 | .pyre/ 240 | 241 | # pytype static type analyzer 242 | .pytype/ 243 | 244 | # End of https://www.toptal.com/developers/gitignore/api/python,intellij -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.4" 4 | - "3.5" 5 | - "3.6" 6 | - "3.7" 7 | - "3.8" 8 | - "3.9" 9 | services: 10 | - clickhouse 11 | install: 12 | - pip install -r requirements.txt 13 | script: 14 | - pytest -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Delium Technologies 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Clickhouse Migrator 2 | 3 | [![Build Status](https://app.travis-ci.com/delium/clickhouse-migrator.svg?branch=master)](https://app.travis-ci.com/delium/clickhouse-migrator) 4 | [![Maintainability](https://api.codeclimate.com/v1/badges/0f3bdcfd7fbf643a0e7b/maintainability)](https://codeclimate.com/github/delium/clickhouse-migrator/maintainability) 5 | [![Test Coverage](https://api.codeclimate.com/v1/badges/0f3bdcfd7fbf643a0e7b/test_coverage)](https://codeclimate.com/github/delium/clickhouse-migrator/test_coverage) 6 | 7 | [Clickhouse](https://clickhouse.tech/) is known for its scale to store and fetch large datasets. 8 | 9 | Development and Maintenance of large-scale db systems many times requires constant changes to the actual DB system. 10 | Holding off the scripts to migrate these will be painful. 11 | 12 | We found there is nothing existing earlier and developed one inspired by, [Flyway](https://flywaydb.org/), [Alembic](https://alembic.sqlalchemy.org/en/latest/) 13 | 14 | This is a python library, which you can execute as a pre-hook using sys python. 15 | Or as a migration framework before deployment/server-startup in your application as required. 16 | 17 | 18 | ### Publishing to pypi 19 | * python -m build 20 | * python -m twine upload --verbose --repository pypi dist/* 21 | 22 | 23 | ### Installation 24 | 25 | You can install from pypi using `pip install clickhouse-migrator`. 26 | 27 | ### Usage 28 | 29 | ```python 30 | # <= v1.0.4 31 | from migration_lib.migrate import migrate 32 | 33 | migrate(db_name, migrations_home, db_host, db_user, db_password, create_db_if_no_exists) 34 | ``` 35 | 36 | ```python 37 | # > v1.0.4 38 | from clickhouse_migrate.migrate import migrate 39 | 40 | migrate(db_name, migrations_home, db_host, db_user, db_password, create_db_if_no_exists) 41 | ``` 42 | 43 | Parameter | Description | Default 44 | -------|-------------|--------- 45 | db_name| Clickhouse database name | None 46 | migrations_home | Path to list of migration files | 47 | db_host | Clickhouse database hostname | localhost 48 | db_password | ***** | **** 49 | create_db_if_no_exists | If the `db_name` is not present, enabling this will create the db | True 50 | db_port | Database port incase your server runs on a non default port | None (defaults to 9000) 51 | queue_exec | Command Pipelining (wait for system mutations if any to complete) for every command | True 52 | 53 | ### Folder and Migration file patterns 54 | 55 | The filenames are pretty similar to how `flyway` keeps it. 56 | 57 | Your first version filename should be prefixed with `V1__` (double underscore) 58 | These migrations are executed one by one, failures in between will stop and not further version files will be executed. 59 | 60 | #### Multi statement and single statement migrations 61 | 62 | If your migration is a single statement, you can create a file in the migration folder using the .sql extension and push your migration statement in there. 63 | 64 | If you want to execute more than one statement in your migration, you can use a json file using the array syntax. Note that when using a json file, contents should be a valid json array as show. Ensure to keep migrations logical. Its not a good practise to push all migrations to one json file and neither is it wise to in all cases have them each statement in one file. 65 | 66 | 67 | ```json 68 | [ 69 | "CREATE TABLE pytest.sample1(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() ORDER BY tuple()", 70 | "CREATE TABLE pytest.sample2(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() ORDER BY tuple()", 71 | "CREATE TABLE pytest.sample3(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() ORDER BY tuple()" 72 | ] 73 | ``` 74 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delium/clickhouse-migrator/e7050b20fa2a751fbd600a1f07ab0474a7b6a6b7/__init__.py -------------------------------------------------------------------------------- /clickhouse_migrate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delium/clickhouse-migrator/e7050b20fa2a751fbd600a1f07ab0474a7b6a6b7/clickhouse_migrate/__init__.py -------------------------------------------------------------------------------- /clickhouse_migrate/migrate.py: -------------------------------------------------------------------------------- 1 | import time 2 | import hashlib 3 | import os 4 | import pathlib 5 | import json 6 | import datetime 7 | 8 | import pandas as pd 9 | from clickhouse_driver import Client 10 | 11 | 12 | def execute_and_inflate(client, query): 13 | result = client.execute(query, with_column_types=True) 14 | column_names = [c[0] for c in result[len(result) - 1]] 15 | return pd.DataFrame([dict(zip(column_names, d)) for d in result[0]]) 16 | 17 | 18 | def get_connection(db_name, db_host, db_user, db_password, db_port=None): 19 | return Client(db_host, port=db_port, user=db_user, password=db_password, database=db_name) 20 | 21 | 22 | def init_db(client, db_name): 23 | client.execute("CREATE TABLE IF NOT EXISTS schema_versions (version UInt32, md5 String, script String, created_at DateTime DEFAULT now()) ENGINE = MergeTree ORDER BY tuple(created_at)") 24 | 25 | 26 | def migrations_to_apply(client, incoming): 27 | current_versions = execute_and_inflate(client, "SELECT version AS version, script AS c_script, md5 as c_md5 from schema_versions") 28 | if current_versions.empty: 29 | return incoming 30 | if (len(incoming) == 0 or len(incoming) < len(current_versions)): 31 | raise AssertionError(f"Migrations have gone missing, your code base should not truncate migrations, use migrations to correct older migrations") 32 | current_versions = current_versions.astype({'version': 'int32'}) 33 | incoming = incoming.astype({'version': 'int32'}) 34 | execution_stat = pd.merge(current_versions, incoming, on='version', how='outer') 35 | committed_and_absconded = execution_stat[execution_stat.c_md5.notnull() & execution_stat.md5.isnull()] 36 | if (len(committed_and_absconded) > 0): 37 | raise AssertionError(f"Migrations have gone missing, your code base should not truncate migrations, use migrations to correct older migrations") 38 | terms_violated = execution_stat[execution_stat.c_md5.notnull() & execution_stat.md5.notnull() & ~(execution_stat.md5 == execution_stat.c_md5)] 39 | if (len(terms_violated) > 0): 40 | raise AssertionError(f"Do not edit migrations once run, use migrations to correct older migrations") 41 | return execution_stat[execution_stat.c_md5.isnull()][['version', 'script', 'md5']] 42 | 43 | 44 | def apply_migration(client, migrations, db_name, queue_exec=True): 45 | if (migrations.empty): 46 | return 47 | migrations = migrations.sort_values('version') 48 | for _, row in migrations.iterrows(): 49 | with open(row['script']) as f: 50 | migration_scripts = json.load(f) if row['script'].endswith('.json') else [f.read()] 51 | for migration_script in migration_scripts: 52 | pipelined(client, migration_script, db_name) if queue_exec else client.execute(migration_script) 53 | print(f"INSERT INTO schema_versions(version, script, md5) VALUES({row['version']}, '{row['script']}', '{row['md5']}')") 54 | client.execute(f"INSERT INTO schema_versions(version, script, md5) VALUES", [{'version': row['version'], 'script': row['script'], 'md5':row['md5']}]) 55 | 56 | def pipelined(client, migration_script, db_name, timeout=60*60): 57 | ct = datetime.datetime.now() 58 | current_time=ct.strftime("%Y-%m-%d %H:%M:%S") 59 | client.execute(migration_script) 60 | while True: 61 | loop_time = datetime.datetime.now() 62 | if((loop_time - ct).total_seconds() >= timeout): 63 | raise Exception(f'Transaction Timeout - Unable to complete in {timeout} seconds, migration -> {migration_script}', ) 64 | mutations_to_inspect = execute_and_inflate(client, f"SELECT database, table, mutation_id, lower(command) as command FROM system.mutations WHERE database='{db_name}' and create_time >= '{current_time}' and is_done=0") 65 | if mutations_to_inspect.empty: 66 | break 67 | mutations_to_inspect['match'] = mutations_to_inspect.apply(lambda row: row['command'] in migration_script, axis=1) 68 | mutations_to_inspect = mutations_to_inspect[mutations_to_inspect['match'] == True] 69 | if mutations_to_inspect.empty: 70 | break 71 | time.sleep(5) 72 | 73 | 74 | def create_db(db_name, db_host, db_user, db_password, db_port=None): 75 | client = Client(db_host, port=db_port, user=db_user, password=db_password) 76 | client.execute(f"CREATE DATABASE IF NOT EXISTS {db_name}") 77 | client.disconnect() 78 | 79 | def migrate(db_name, migrations_home, db_host, db_user, db_password, db_port=None, create_db_if_no_exists=True, queue_exec=True): 80 | if create_db_if_no_exists: 81 | create_db(db_name, db_host, db_user, db_password, db_port=db_port) 82 | client = get_connection(db_name, db_host, db_user, db_password, db_port=db_port) 83 | init_db(client, db_name) 84 | migrations = [{"version": int(f.name.split('_')[0].replace('V', '')), 85 | "script": f"{migrations_home}/{f.name}", "md5": hashlib.md5(pathlib.Path(f"{migrations_home}/{f.name}").read_bytes()).hexdigest()} 86 | for f in os.scandir(f"{migrations_home}") if f.name.endswith('.sql') or f.name.endswith('.json')] 87 | apply_migration(client, migrations_to_apply(client, pd.DataFrame(migrations)), db_name, queue_exec=queue_exec) 88 | client.disconnect() 89 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==0.25.2 2 | clickhouse-driver==0.1.2 3 | pytest==5.1.1 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | from os import path 4 | 5 | this_directory = path.abspath(path.dirname(__file__)) 6 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f: 7 | long_description = f.read() 8 | 9 | setup(name='clickhouse-migrator', 10 | packages=find_packages(), 11 | version='1.0.6', 12 | description='Migration library for Clickhouse', 13 | author='Delium Engineering', 14 | install_requires=['pandas', 'clickhouse_driver'], 15 | long_description=long_description, 16 | long_description_content_type='text/markdown', 17 | url='https://github.com/delium/clickhouse-migrator', 18 | author_email='oss@delium.co', 19 | tests_require=['pytest==5.1.1'], 20 | test_suite='tests', 21 | license='MIT', 22 | license_file='LICENSE', 23 | python_requires='>=3.6') 24 | -------------------------------------------------------------------------------- /tests/clickhouse_migrations/V1__create_test.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE pytest.sample(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() 2 | ORDER BY tuple() -------------------------------------------------------------------------------- /tests/migrations_mixed/V1__create_test.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE pytest.sample(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() ORDER BY tuple() -------------------------------------------------------------------------------- /tests/migrations_mixed/V2_create_multiple_tables.json: -------------------------------------------------------------------------------- 1 | [ 2 | "CREATE TABLE pytest.sample1(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() ORDER BY tuple()", 3 | "CREATE TABLE pytest.sample2(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() ORDER BY tuple()", 4 | "CREATE TABLE pytest.sample3(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() ORDER BY tuple()" 5 | ] -------------------------------------------------------------------------------- /tests/migrations_seq/V1_sequential_dmls.json: -------------------------------------------------------------------------------- 1 | [ 2 | "ALTER TABLE pytest.sample ADD COLUMN enabled UInt32 DEFAULT 1", 3 | "ALTER TABLE pytest.sample ADD COLUMN guard UInt32 DEFAULT -1", 4 | "ALTER TABLE pytest.sample UPDATE enabled=0 WHERE name > 3000", 5 | "ALTER TABLE pytest.sample UPDATE guard=0 WHERE enabled = 0", 6 | "ALTER TABLE pytest.sample UPDATE guard=1 WHERE enabled = 1" 7 | ] -------------------------------------------------------------------------------- /tests/migrations_seq/test.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delium/clickhouse-migrator/e7050b20fa2a751fbd600a1f07ab0474a7b6a6b7/tests/migrations_seq/test.csv.gz -------------------------------------------------------------------------------- /tests/test_clickhouse_migration.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from clickhouse_migrate.migrate import migrate, get_connection, migrations_to_apply, init_db, execute_and_inflate 3 | import pandas as pd 4 | import os 5 | 6 | 7 | @pytest.fixture 8 | def client(): 9 | client = get_connection('default', 'localhost', 'default', '') 10 | client.execute('CREATE DATABASE IF NOT EXISTS pytest') 11 | client.disconnect() 12 | return get_connection('pytest', 'localhost', 'default', '') 13 | 14 | 15 | @pytest.fixture(autouse=True) 16 | def before(client): 17 | client.execute('DROP TABLE IF EXISTS schema_versions') 18 | init_db(client, 'pytest') 19 | 20 | 21 | def clean_slate(client): 22 | client.execute('DROP DATABASE pytest') 23 | client.execute('CREATE DATABASE pytest') 24 | init_db(client, 'pytest') 25 | 26 | 27 | def test_should_compute_no_migrations_to_run(client): 28 | incoming = pd.DataFrame([]) 29 | results = migrations_to_apply(client, incoming) 30 | assert results.size == 0 31 | 32 | 33 | def test_should_raise_exception_on_deleted_migrations_no_incoming(client): 34 | incoming = pd.DataFrame([]) 35 | client.execute('INSERT INTO schema_versions(version, script, md5) VALUES', [{'version': 1, 'script': 'location_to_script', 'md5': '1234'}]) 36 | with pytest.raises(AssertionError): 37 | migrations_to_apply(client, incoming) 38 | 39 | 40 | def test_should_raise_exceptions_on_missing_migration(client): 41 | incoming = pd.DataFrame([{'version': 2, 'script': 'location_to_script', 'md5': '12345'}]) 42 | client.execute('INSERT INTO schema_versions(version, script, md5) VALUES', [{'version': 1, 'script': 'location_to_script', 'md5': '1234'}]) 43 | with pytest.raises(AssertionError): 44 | migrations_to_apply(client, incoming) 45 | 46 | 47 | def test_should_raise_exceptions_on_modified_post_committed_migrations(client): 48 | incoming = pd.DataFrame([{'version': 1, 'script': 'location_to_script', 'md5': '12345'}]) 49 | client.execute('INSERT INTO schema_versions(version, script, md5) VALUES', [{'version': 1, 'script': 'location_to_script', 'md5': '1234'}]) 50 | with pytest.raises(AssertionError): 51 | migrations_to_apply(client, incoming) 52 | 53 | 54 | def test_should_return_migrations_to_run(client): 55 | incoming = pd.DataFrame([{'version': 1, 'script': 'location_to_script', 'md5': '1234'}, {'version': 2, 'script': 'location_to_script_2', 'md5': '1234'}]) 56 | client.execute('INSERT INTO schema_versions(version, script, md5) VALUES', [{'version': 1, 'script': 'location_to_script', 'md5': '1234'}]) 57 | results = migrations_to_apply(client, incoming) 58 | assert len(results) == 1 59 | assert results.version.values[0] == 2 60 | 61 | 62 | def test_should_migrate_empty_database(client): 63 | client = get_connection('pytest', 'localhost', 'default', '') 64 | clean_slate(client) 65 | tables = execute_and_inflate(client, 'show tables') 66 | assert len(tables) == 1 67 | assert tables.name.values[0] == 'schema_versions' 68 | migrate('pytest', 'tests/clickhouse_migrations', 'localhost', 'default', '') 69 | tables = execute_and_inflate(client, 'show tables') 70 | assert len(tables) == 2 71 | assert tables.name.values[0] == 'sample' 72 | assert tables.name.values[1] == 'schema_versions' 73 | client.disconnect() 74 | 75 | def test_should_migrate_using_sql_and_json_migrations(client): 76 | client = get_connection('pytest', 'localhost', 'default', '') 77 | clean_slate(client) 78 | tables = execute_and_inflate(client, 'show tables') 79 | assert len(tables) == 1 80 | assert tables.name.values[0] == 'schema_versions' 81 | migrate('pytest', 'tests/migrations_mixed', 'localhost', 'default', '') 82 | tables = execute_and_inflate(client, 'show tables') 83 | assert len(tables) == 5 84 | assert tables.name.values[0] == 'sample' 85 | assert tables.name.values[1] == 'sample1' 86 | assert tables.name.values[2] == 'sample2' 87 | assert tables.name.values[3] == 'sample3' 88 | assert tables.name.values[4] == 'schema_versions' 89 | client.disconnect() 90 | 91 | def test_should_migrate_in_queue_when_enabled(client): 92 | client = get_connection('pytest', 'localhost', 'default', '') 93 | clean_slate(client) 94 | client.execute('CREATE TABLE sample(id UInt32, name UInt32) ENGINE MergeTree PARTITION BY tuple() ORDER BY tuple()') 95 | 96 | tables = execute_and_inflate(client, 'show tables') 97 | assert len(tables) == 2 98 | assert tables.name.values[0] == 'sample' 99 | assert tables.name.values[1] == 'schema_versions' 100 | 101 | os.system('gunzip < "tests/migrations_seq/test.csv.gz" | clickhouse-client --query="INSERT INTO pytest.sample FORMAT CSVWithNames"') 102 | total_rows = 100000 103 | assert execute_and_inflate(client, 'SELECT COUNT(*) AS nrow FROM pytest.sample').nrow.values[0] == total_rows 104 | enabled0 = execute_and_inflate(client, "SELECT COUNT(*) AS nrow FROM pytest.sample WHERE name > 3000").nrow.values[0] 105 | 106 | migrate('pytest', 'tests/migrations_seq', 'localhost', 'default', '', queue_exec=True) 107 | 108 | assert execute_and_inflate(client, 'SELECT COUNT(*) AS nrow FROM pytest.sample WHERE enabled = 0').nrow.values[0] == enabled0 109 | assert execute_and_inflate(client, 'SELECT COUNT(*) AS nrow FROM pytest.sample WHERE guard = 0').nrow.values[0] == enabled0 110 | 111 | assert execute_and_inflate(client, 'SELECT COUNT(*) AS nrow FROM pytest.sample WHERE guard = 1').nrow.values[0] == total_rows - enabled0 112 | assert execute_and_inflate(client, 'SELECT COUNT(*) AS nrow FROM pytest.sample WHERE guard = -1').nrow.values[0] == 0 113 | 114 | tables = execute_and_inflate(client, 'show tables') 115 | assert len(tables) == 2 116 | assert tables.name.values[0] == 'sample' 117 | assert tables.name.values[1] == 'schema_versions' 118 | client.disconnect() 119 | 120 | def test_ensure_parallel_dataset_fail_on_no_queue(client): 121 | client = get_connection('pytest', 'localhost', 'default', '') 122 | clean_slate(client) 123 | client.execute('CREATE TABLE sample(id UInt32, name UInt32) ENGINE MergeTree PARTITION BY tuple() ORDER BY tuple()') 124 | 125 | tables = execute_and_inflate(client, 'show tables') 126 | assert len(tables) == 2 127 | assert tables.name.values[0] == 'sample' 128 | assert tables.name.values[1] == 'schema_versions' 129 | 130 | os.system('gunzip < "tests/migrations_seq/test.csv.gz" | clickhouse-client --query="INSERT INTO pytest.sample FORMAT CSVWithNames"') 131 | total_rows = 100000 132 | assert execute_and_inflate(client, 'SELECT COUNT(*) AS nrow FROM pytest.sample').nrow.values[0] == total_rows 133 | enabled0 = execute_and_inflate(client, "SELECT COUNT(*) AS nrow FROM pytest.sample WHERE name > 3000").nrow.values[0] 134 | 135 | migrate('pytest', 'tests/migrations_seq', 'localhost', 'default', '', queue_exec=False) 136 | 137 | assert execute_and_inflate(client, 'SELECT COUNT(*) AS nrow FROM pytest.sample WHERE guard = 0').nrow.values[0] != enabled0 138 | client.disconnect() 139 | --------------------------------------------------------------------------------