├── .github ├── dependabot.yml └── workflows │ ├── link_startrek.yml │ ├── test_clickhouse_version.yml │ └── workflow.yml ├── .gitignore ├── .python-version ├── .sourcery.yaml ├── AUTHORS ├── CONTRIBUTING.md ├── Dockerfile-deb-build ├── LICENSE ├── Makefile ├── README.md ├── build_deb.sh ├── build_deb_in_docker.sh ├── ch_tools ├── __init__.py ├── chadmin │ ├── README.md │ ├── __init__.py │ ├── chadmin_cli.py │ ├── cli │ │ ├── __init__.py │ │ ├── chadmin_group.py │ │ ├── chs3_backup_group.py │ │ ├── config_command.py │ │ ├── crash_log_group.py │ │ ├── data_store_group.py │ │ ├── database_group.py │ │ ├── database_metadata.py │ │ ├── diagnostics_command.py │ │ ├── dictionary_group.py │ │ ├── disk_group.py │ │ ├── flamegraph_group.py │ │ ├── list_async_metrics_command.py │ │ ├── list_events_command.py │ │ ├── list_functions_command.py │ │ ├── list_macros_command.py │ │ ├── list_metrics_command.py │ │ ├── list_settings_command.py │ │ ├── merge_group.py │ │ ├── metadata.py │ │ ├── move_group.py │ │ ├── mutation_group.py │ │ ├── object_storage_group.py │ │ ├── part_group.py │ │ ├── part_log_group.py │ │ ├── partition_group.py │ │ ├── process_group.py │ │ ├── query_log_group.py │ │ ├── replica_group.py │ │ ├── replicated_fetch_group.py │ │ ├── replication_queue_group.py │ │ ├── restore_replica_command.py │ │ ├── s3_credentials_config_group.py │ │ ├── stack_trace_command.py │ │ ├── table_group.py │ │ ├── thread_log_group.py │ │ ├── wait_group.py │ │ └── zookeeper_group.py │ └── internal │ │ ├── __init__.py │ │ ├── backup.py │ │ ├── clickhouse_disks.py │ │ ├── database_replica.py │ │ ├── diagnostics │ │ ├── __init__.py │ │ ├── data.py │ │ ├── diagnose.py │ │ ├── flamegraph.py │ │ ├── query.py │ │ └── utils.py │ │ ├── dictionary.py │ │ ├── migration.py │ │ ├── object_storage │ │ ├── __init__.py │ │ ├── obj_list_item.py │ │ ├── orphaned_objects_state.py │ │ ├── s3_cleanup.py │ │ ├── s3_iterator.py │ │ └── s3_object_metadata.py │ │ ├── part.py │ │ ├── partition.py │ │ ├── process.py │ │ ├── system.py │ │ ├── table.py │ │ ├── table_metadata.py │ │ ├── table_replica.py │ │ ├── utils.py │ │ ├── zookeeper.py │ │ └── zookeeper_clean.py ├── common │ ├── __init__.py │ ├── backup.py │ ├── cli │ │ ├── __init__.py │ │ ├── context_settings.py │ │ ├── formatting.py │ │ ├── locale_resolver.py │ │ ├── parameters.py │ │ ├── progress_bar.py │ │ └── utils.py │ ├── clickhouse │ │ ├── __init__.py │ │ ├── client │ │ │ ├── __init__.py │ │ │ ├── clickhouse_client.py │ │ │ ├── error.py │ │ │ ├── query.py │ │ │ ├── query_output_format.py │ │ │ ├── retry.py │ │ │ └── utils.py │ │ └── config │ │ │ ├── __init__.py │ │ │ ├── clickhouse.py │ │ │ ├── clickhouse_keeper.py │ │ │ ├── path.py │ │ │ ├── storage_configuration.py │ │ │ ├── users.py │ │ │ ├── utils.py │ │ │ └── zookeeper.py │ ├── commands │ │ ├── clean_object_storage.py │ │ └── replication_lag.py │ ├── config.py │ ├── dbaas.py │ ├── logging.py │ ├── process_pool.py │ ├── result.py │ ├── tls.py │ ├── type │ │ ├── __init__.py │ │ └── typed_enum.py │ ├── utils.py │ └── yaml.py ├── monrun_checks │ ├── README.md │ ├── __init__.py │ ├── ch_backup.py │ ├── ch_core_dumps.py │ ├── ch_dist_tables.py │ ├── ch_geobase.py │ ├── ch_keeper.py │ ├── ch_log_errors.py │ ├── ch_orphaned_objects.py │ ├── ch_ping.py │ ├── ch_replication_lag.py │ ├── ch_resetup_state.py │ ├── ch_ro_replica.py │ ├── ch_s3_backup_orphaned.py │ ├── ch_s3_credentials_config.py │ ├── ch_system_queues.py │ ├── ch_tls.py │ ├── clickhouse_info.py │ ├── dns.py │ ├── exceptions.py │ ├── main.py │ ├── status.py │ └── utils.py └── monrun_checks_keeper │ ├── README.md │ ├── __init__.py │ ├── keeper_commands.py │ ├── main.py │ └── status.py ├── debian ├── .gitignore ├── changelog ├── compat ├── control ├── copyright ├── rules └── source │ └── format ├── pyproject.toml ├── resources ├── completion │ ├── ch-monitoring-completion.bash │ ├── chadmin-completion.bash │ └── keeper-monitoring-completion.bash └── logrotate │ ├── chadmin.logrotate │ ├── clickhouse-monitoring.logrotate │ └── keeper-monitoring.logrotate ├── tests ├── configuration.py ├── env_control.py ├── environment.py ├── features │ ├── chadmin.feature │ ├── chadmin_partitions.feature │ ├── chadmin_perf_diag.feature │ ├── chadmin_zookeeper.feature │ ├── data_storage_group.feature │ ├── database_migrate.feature │ ├── monrun.feature │ ├── monrun_keeper.feature │ ├── object_storage.feature │ ├── s3_credentials.feature │ ├── table_change_uuid.feature │ └── table_delete.feature ├── images │ ├── clickhouse │ │ ├── Dockerfile │ │ └── config │ │ │ ├── ch-backup.conf │ │ │ ├── clickhouse-keyring.gpg │ │ │ ├── config.xml │ │ │ ├── dbaas.conf │ │ │ ├── monitor-ch-backup │ │ │ ├── regions_hierarchy.txt │ │ │ ├── regions_names_ru.txt │ │ │ ├── supervisor │ │ │ ├── conf.d │ │ │ │ ├── clickhouse-server.conf │ │ │ │ └── sshd.conf │ │ │ └── supervisord.conf │ │ │ └── users.xml │ ├── http_mock │ │ ├── Dockerfile │ │ └── service.py │ ├── minio │ │ ├── Dockerfile │ │ └── config │ │ │ └── mc.json │ └── zookeeper │ │ ├── Dockerfile │ │ └── config │ │ ├── log4j.properties │ │ ├── start_zk.sh │ │ ├── zoo.cfg │ │ └── zookeeper.conf ├── modules │ ├── __init__.py │ ├── chadmin.py │ ├── clickhouse.py │ ├── compose.py │ ├── docker.py │ ├── logs.py │ ├── minio.py │ ├── s3.py │ ├── steps.py │ ├── templates.py │ ├── typing.py │ └── utils.py ├── steps │ ├── __init__.py │ ├── chadmin.py │ ├── clickhouse.py │ ├── common.py │ ├── failure_mockers.py │ ├── s3.py │ └── zookeeper.py └── unit │ ├── __init__.py │ ├── chadmin │ └── test_validate_version.py │ ├── common │ ├── __init__.py │ ├── clickhouse │ │ ├── __init__.py │ │ ├── metadata │ │ │ ├── broken_no_engine.sql │ │ │ ├── broken_no_engine_full.sql │ │ │ ├── broken_no_uuid.sql │ │ │ ├── broken_no_uuid_full.sql │ │ │ ├── broken_uuid.sql │ │ │ ├── table_aggregating_merge_tree.sql │ │ │ ├── table_collapsing_merge_tree.sql │ │ │ ├── table_merge_tree.sql │ │ │ ├── table_merge_tree_field_engine.sql │ │ │ ├── table_merge_tree_field_uuid.sql │ │ │ ├── table_replacing_merge_tree.sql │ │ │ ├── table_replicated_aggregating_merge_tree.sql │ │ │ ├── table_replicated_collapsing_merge_tree.sql │ │ │ ├── table_replicated_merge_tree.sql │ │ │ ├── table_replicated_merge_tree_ver.sql │ │ │ ├── table_replicated_replacing_merge_tree.sql │ │ │ ├── table_replicated_summing_merge_tree.sql │ │ │ ├── table_replicated_versioned_collapsing_merge_tree.sql │ │ │ ├── table_summing_merge_tree.sql │ │ │ └── table_versioned_collapsing_merge_tree.sql │ │ ├── test_config.py │ │ ├── test_metadata_parser.py │ │ └── test_zk_path_escape.py │ ├── query │ │ ├── __init__.py │ │ └── test_query.py │ └── type │ │ ├── __init__.py │ │ └── test_typed_enum.py │ └── monrun │ ├── __init__.py │ └── test_backup.py └── uv.lock /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: / 5 | schedule: 6 | interval: daily 7 | - package-ecosystem: github-actions 8 | directory: / 9 | schedule: 10 | interval: daily 11 | -------------------------------------------------------------------------------- /.github/workflows/link_startrek.yml: -------------------------------------------------------------------------------- 1 | name: Link with Startrek 2 | 3 | on: 4 | pull_request: { branches: [main] } 5 | 6 | env: 7 | ISSUE_PATTERN: '[A-Z]+-[0-9]+' 8 | 9 | jobs: 10 | link: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Parse issue ID 14 | run: | 15 | LAST_COMMIT_MESSAGE="$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" "${{ github.event.pull_request.commits_url }}" | jq -r .[-1].commit.message)" 16 | if [[ "$LAST_COMMIT_MESSAGE" =~ \[($ISSUE_PATTERN)\] || "${{ github.head_ref }}" =~ ^($ISSUE_PATTERN) ]]; then 17 | echo ISSUE_NUMBER="${BASH_REMATCH[1]}" >> "${GITHUB_ENV}" 18 | fi 19 | - name: Link issue 20 | if: env.ISSUE_NUMBER 21 | uses: fjogeleit/http-request-action@v1 22 | with: 23 | url: 'https://st-api.yandex-team.ru/v2/issues/${{ env.ISSUE_NUMBER }}' 24 | method: 'LINK' 25 | customHeaders: > 26 | { 27 | "Link": "<${{ github.server_url }}/${{ github.repository }}/pull/${{ github.event.number }}>; rel=\"relates\"", 28 | "Authorization": "OAuth ${{ secrets.OAUTH_STARTREK_TOKEN }}" 29 | } 30 | ignoreStatusCodes: 409 31 | -------------------------------------------------------------------------------- /.github/workflows/test_clickhouse_version.yml: -------------------------------------------------------------------------------- 1 | name: test_clickhouse_version 2 | 3 | run-name: ${{ github.workflow }}_${{ inputs.clickhouse_version }}_${{ inputs.id || github.run_number }} 4 | 5 | on: 6 | workflow_dispatch: 7 | inputs: 8 | clickhouse_version: 9 | description: 'ClickHouse version' 10 | required: true 11 | type: string 12 | id: 13 | description: 'Run identifier' 14 | required: false 15 | type: string 16 | default: "" 17 | 18 | jobs: 19 | test_integration: 20 | runs-on: ubuntu-latest 21 | env: 22 | CLICKHOUSE_VERSION: ${{ inputs.clickhouse_version }} 23 | steps: 24 | - uses: actions/checkout@v4 25 | - uses: astral-sh/setup-uv@v6 26 | - name: run integration tests 27 | run: make test-integration 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | .install-deps 4 | .mypy_cache/ 5 | .pytype/ 6 | .ruff_cache/ 7 | .session_conf.sav 8 | __pycache__ 9 | build/ 10 | cython_debug/ 11 | dist/ 12 | out/ 13 | tests/reports/ 14 | tests/staging/ 15 | .venv/ 16 | ch_tools/version.txt 17 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /.sourcery.yaml: -------------------------------------------------------------------------------- 1 | rule_settings: 2 | disable: 3 | - use-named-expression 4 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | The following authors have created the source code of "clickhouse-tools" 2 | published and distributed by YANDEX LLC as the owner: 3 | 4 | Alexander Burmak 5 | Dmitry Starov 6 | Anton Ivashkin 7 | Grigorii Pervakov 8 | Petr Nuzhnov 9 | Egor Medvedev 10 | Aleksei Filatov 11 | Evgeny Dyukov 12 | Evgeny Strizhnev 13 | Vadim Volodin 14 | Anton Chaporgin 15 | Evgenii Kopanev 16 | Mikhail Kot 17 | Mikhail Burdukov 18 | Kirill Garbar 19 | Konstantin Morozov 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Notice to external contributors 2 | 3 | ## General info 4 | 5 | Hello! In order for us (YANDEX LLC) to accept patches and other contributions from you, you will have to adopt our Yandex Contributor License Agreement (the "**CLA**"). The current version of the CLA can be found here: 6 | 1) https://yandex.ru/legal/cla/?lang=en (in English) and 7 | 2) https://yandex.ru/legal/cla/?lang=ru (in Russian). 8 | 9 | By adopting the CLA, you state the following: 10 | 11 | * You obviously wish and are willingly licensing your contributions to us for our open source projects under the terms of the CLA, 12 | * You have read the terms and conditions of the CLA and agree with them in full, 13 | * You are legally able to provide and license your contributions as stated, 14 | * We may use your contributions for our open source projects and for any other our project too, 15 | * We rely on your assurances concerning the rights of third parties in relation to your contributions. 16 | 17 | If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you have already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it in further in accordance with terms and conditions of the CLA. 18 | 19 | ## Provide contributions 20 | 21 | If you have already adopted terms and conditions of the CLA, you are able to provide your contributions. When you submit your pull request, please add the following information into it: 22 | 23 | ``` 24 | I hereby agree to the terms of the CLA available at: [link]. 25 | ``` 26 | 27 | Replace the bracketed text as follows: 28 | * [link] is the link to the current version of the CLA: https://yandex.ru/legal/cla/?lang=en (in English) or https://yandex.ru/legal/cla/?lang=ru (in Russian). 29 | 30 | It is enough to provide us such notification once. 31 | 32 | ## Other questions 33 | 34 | If you have any questions, please mail us at opensource@yandex-team.ru. 35 | -------------------------------------------------------------------------------- /Dockerfile-deb-build: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE=ubuntu:22.04 2 | FROM --platform=$TARGETPLATFORM $BASE_IMAGE 3 | 4 | ARG DEBIAN_FRONTEND=noninteractive 5 | 6 | RUN set -ex \ 7 | && apt-get update \ 8 | && apt-get install -y --no-install-recommends \ 9 | # Debian packaging tools 10 | build-essential \ 11 | debhelper \ 12 | devscripts \ 13 | fakeroot \ 14 | # Managing keys for debian package signing 15 | gpg \ 16 | gpg-agent \ 17 | # Python packaging tools 18 | python3-dev \ 19 | python3-pip \ 20 | python3-setuptools \ 21 | python3-venv \ 22 | # Misc 23 | curl \ 24 | locales \ 25 | # Configure locales 26 | && locale-gen en_US.UTF-8 \ 27 | && update-locale LANG=en_US.UTF-8 \ 28 | # Ensure that `python` refers to `python3` so that poetry works. 29 | # It makes sense for ubuntu:18.04 30 | && ln -s /usr/bin/python3 /usr/bin/python \ 31 | # Install `uv` 32 | && python3 -m pip install --upgrade pip \ 33 | && python3 -m pip install uv \ 34 | && ln -sf /usr/local/bin/uv /usr/bin/uv 35 | 36 | # Project directory must be mounted here 37 | VOLUME /src 38 | WORKDIR /src 39 | 40 | CMD ["make", "build-deb-package-local"] 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2023 YANDEX LLC 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![license](https://img.shields.io/github/license/yandex/ch-tools)](https://github.com/yandex/ch-tools/blob/main/LICENSE) 2 | [![tests status](https://img.shields.io/github/actions/workflow/status/yandex/ch-tools/.github%2Fworkflows%2Fworkflow.yml?event=push&label=tests&logo=github)](https://github.com/yandex/ch-tools/actions/workflows/workflow.yml?query=event%3Apush) 3 | [![chat](https://img.shields.io/badge/telegram-chat-blue)](https://t.me/+O4gURpLnQ604OTE6) 4 | 5 | # clickhouse-tools 6 | 7 | **clickhouse-tools** is a set of tools for administration and diagnostics of [ClickHouse](https://clickhouse.com/) DBMS. 8 | 9 | ## Tools 10 | 11 | **clickhouse-tools** consist of following components: 12 | - [chadmin](./ch_tools/chadmin/README.md) - ClickHouse administration tool 13 | - [ch-monitoring](./ch_tools/monrun_checks/README.md) - ClickHouse monitoring tool 14 | - [keeper-monitoring](./ch_tools/monrun_checks_keeper/README.md) - ClickHouse Keeper / ZooKeeper monitoring tool 15 | 16 | All of these tools must be run on the same host as ClickHouse server is running. 17 | 18 | ## Local development 19 | 20 | Requirements: 21 | * GNU Make version > 3.81 22 | * [uv](https://docs.astral.sh/uv) 23 | * Docker 24 | 25 | ```sh 26 | # lint 27 | make lint 28 | 29 | # unit tests 30 | make test-unit 31 | make test-unit PYTEST_ARGS="-k test_name" 32 | 33 | # integration tests (rebuild docker images using a .whl file) 34 | make test-integration 35 | make test-integration BEHAVE_ARGS="-i feature_name" 36 | 37 | # integration tests (supply a custom ClickHouse version to test against) 38 | CLICKHOUSE_VERSION="1.2.3.4" make test-integration 39 | # If you want to have containers running on failure, supply a flag: 40 | # BEHAVE_ARGS="-D no_stop_on_fail" 41 | 42 | # For building deb packages 43 | make build-deb-package 44 | ``` 45 | -------------------------------------------------------------------------------- /build_deb.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Function to display error message and exit 6 | error_exit() { 7 | echo "Error: $1" >&2 8 | exit 1 9 | } 10 | 11 | # Sanitize package signing options 12 | COUNT=0 13 | for sign_param in DEB_SIGN_KEY DEB_SIGN_KEY_ID DEB_SIGN_KEY_PATH; do 14 | if [[ -n "${!sign_param}" ]]; then ((COUNT+=1)); fi 15 | done 16 | if (( COUNT > 1 )); then 17 | error_exit "At most one of DEB_SIGN_KEY or DEB_SIGN_KEY_ID or DEB_SIGN_KEY_PATH vars must be defined" 18 | fi 19 | 20 | # Import GPG signing private key if it is provided 21 | if [[ -n "${DEB_SIGN_KEY_ID}" ]]; then 22 | # Check if gpg knows about this key id 23 | if [[ $(gpg --list-keys ${DEB_SIGN_KEY_ID} 2>&1) =~ "No public key" ]]; then 24 | error_exit "No public key ${DEB_SIGN_KEY_ID}" 25 | else 26 | SIGN_ARGS="-k${DEB_SIGN_KEY_ID}" 27 | fi 28 | elif [[ -n "${DEB_SIGN_KEY}" ]]; then 29 | echo "${DEB_SIGN_KEY}" | gpg --import || error_exit "Unable to import signing key from var DEB_SIGN_KEY" 30 | KEY_ID=$(gpg --list-keys --with-colon | awk -F: '/^fpr/ {print $10;exit}') 31 | if [[ -z ${KEY_ID} ]]; then 32 | error_exit "Unable to import signing key from var DEB_SIGN_KEY" 33 | fi 34 | SIGN_ARGS="-k${KEY_ID}" 35 | elif [[ -n "${DEB_SIGN_KEY_PATH}" ]]; then 36 | gpg --import --with-colons "${DEB_SIGN_KEY_PATH}" || error_exit "Unable to import signing key from path: ${DEB_SIGN_KEY_PATH}" 37 | KEY_ID=$(gpg --list-keys --with-colon | awk -F: '/^fpr/ {print $10;exit}') 38 | if [[ -z ${KEY_ID} ]]; then 39 | error_exit "Unable to import signing key from path: ${DEB_SIGN_KEY_PATH}" 40 | fi 41 | SIGN_ARGS="-k${KEY_ID}" 42 | else 43 | # Do not sign debian package 44 | SIGN_ARGS="-us -uc" 45 | fi 46 | 47 | # Build package 48 | (cd debian && debuild --preserve-env --check-dirname-level 0 ${SIGN_ARGS}) 49 | 50 | # Move debian package and signed metadata files to the output dir 51 | DEB_FILES=$(echo ../${PROJECT_NAME}*.{deb,dsc,changes,buildinfo,tar.*}) 52 | mkdir -p ${BUILD_DEB_OUTPUT_DIR} && mv $DEB_FILES ${BUILD_DEB_OUTPUT_DIR} 53 | -------------------------------------------------------------------------------- /build_deb_in_docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | BUILD_IMAGE=${PROJECT_NAME}-build 6 | BUILD_ARGS=() 7 | 8 | # Compose image name and build arguments 9 | # Example of image name "clickhouse-tools-build-linux-amd64-linux-bionic" 10 | if [[ -n "${DEB_TARGET_PLATFORM}" ]]; then 11 | BUILD_ARGS+=(--platform=${DEB_TARGET_PLATFORM}) 12 | BUILD_IMAGE="${BUILD_IMAGE}-${DEB_TARGET_PLATFORM}" 13 | fi 14 | if [[ -n "${DEB_BUILD_DISTRIBUTION}" ]]; then 15 | BUILD_ARGS+=(--build-arg BASE_IMAGE=${DEB_BUILD_DISTRIBUTION}) 16 | BUILD_IMAGE="${BUILD_IMAGE}-${DEB_BUILD_DISTRIBUTION}" 17 | fi 18 | # Normalize docker image name 19 | BUILD_IMAGE=$(echo ${BUILD_IMAGE} | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9._-]/-/g') 20 | 21 | RUN_ARGS=( \ 22 | -v ${PWD}:/src \ 23 | --env BUILD_DEB_OUTPUT_DIR="${BUILD_DEB_OUTPUT_DIR}" \ 24 | --env DEB_SIGN_KEY="${DEB_SIGN_KEY}" \ 25 | --env DEB_SIGN_KEY_ID="${DEB_SIGN_KEY_ID}" \ 26 | ) 27 | # Mount signing key file if its path is provided 28 | if [[ -n "${DEB_SIGN_KEY_PATH}" ]]; then 29 | RUN_ARGS+=( \ 30 | -v ${DEB_SIGN_KEY_PATH}:/signing_key \ 31 | --env DEB_SIGN_KEY_PATH=/signing_key \ 32 | ) 33 | fi 34 | 35 | docker build "${BUILD_ARGS[@]}" -t "${BUILD_IMAGE}" -f Dockerfile-deb-build . 36 | docker run "${RUN_ARGS[@]}" "${BUILD_IMAGE}" 37 | -------------------------------------------------------------------------------- /ch_tools/__init__.py: -------------------------------------------------------------------------------- 1 | """A set of tools for administration and diagnostics of ClickHouse DBMS.""" 2 | 3 | from importlib.resources import files 4 | 5 | __version__ = files(__name__).joinpath("version.txt").read_text().strip() 6 | -------------------------------------------------------------------------------- /ch_tools/chadmin/README.md: -------------------------------------------------------------------------------- 1 | # chadmin 2 | 3 | ClickHouse administration tool. 4 | 5 | For getting list of available command, run 6 | ```shell 7 | $ chadmin -h 8 | ``` 9 | -------------------------------------------------------------------------------- /ch_tools/chadmin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/ch_tools/chadmin/__init__.py -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/ch_tools/chadmin/cli/__init__.py -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/chadmin_group.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from typing import Optional 3 | 4 | import click 5 | import cloup 6 | 7 | from ch_tools import __version__ 8 | from ch_tools.common import logging 9 | from ch_tools.common.utils import get_full_command_name 10 | 11 | # pylint: disable=too-many-ancestors 12 | 13 | 14 | class Chadmin(cloup.Group): 15 | def add_command( 16 | self, 17 | cmd: click.Command, 18 | name: Optional[str] = None, 19 | section: Optional[cloup.Section] = None, 20 | fallback_to_default_section: bool = True, 21 | ) -> None: 22 | if cmd.callback is None: 23 | super().add_command( 24 | cmd, 25 | name=name, 26 | section=section, 27 | fallback_to_default_section=fallback_to_default_section, 28 | ) 29 | return 30 | 31 | cmd_callback = cmd.callback 32 | 33 | @wraps(cmd_callback) 34 | @cloup.pass_context 35 | def wrapper(ctx, *a, **kw): 36 | logging.configure( 37 | ctx.obj["config"]["loguru"], 38 | "chadmin", 39 | {"cmd_name": get_full_command_name(ctx)}, 40 | ) 41 | 42 | logging.debug( 43 | "Command starts executing, params: {}, args: {}, version: {}", 44 | { 45 | **ctx.parent.params, 46 | **ctx.params, 47 | }, 48 | ctx.args, 49 | __version__, 50 | ) 51 | 52 | try: 53 | cmd_callback(*a, **kw) 54 | logging.debug("Command completed") 55 | except Exception: 56 | logging.exception("Command failed with error:", short_stdout=True) 57 | 58 | cmd.callback = wrapper 59 | super().add_command( 60 | cmd, 61 | name=name, 62 | section=section, 63 | fallback_to_default_section=fallback_to_default_section, 64 | ) 65 | 66 | def add_group( 67 | self, 68 | cmd: click.Group, 69 | name: Optional[str] = None, 70 | section: Optional[cloup.Section] = None, 71 | fallback_to_default_section: bool = True, 72 | ) -> None: 73 | super().add_command( 74 | cmd, 75 | name=name, 76 | section=section, 77 | fallback_to_default_section=fallback_to_default_section, 78 | ) 79 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/chs3_backup_group.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from click import ClickException, argument, group, option, pass_context 4 | 5 | from ch_tools.chadmin.cli.chadmin_group import Chadmin 6 | from ch_tools.chadmin.internal.backup import unfreeze_backup 7 | from ch_tools.common import logging 8 | from ch_tools.common.backup import ( 9 | CHS3_BACKUPS_DIRECTORY, 10 | get_chs3_backups, 11 | get_orphaned_chs3_backups, 12 | ) 13 | from ch_tools.common.utils import clear_empty_directories_recursively 14 | 15 | 16 | @group("chs3-backup", cls=Chadmin) 17 | def chs3_backup_group(): 18 | """Commands to manage ClickHouse over S3 backups (backups for data stored in S3).""" 19 | pass 20 | 21 | 22 | @chs3_backup_group.command("list") 23 | @option("--orphaned", is_flag=True) 24 | def list_backups(orphaned): 25 | """List backups.""" 26 | backups = get_orphaned_chs3_backups() if orphaned else get_chs3_backups() 27 | for backup in backups: 28 | logging.info(backup) 29 | 30 | 31 | @chs3_backup_group.command("delete") 32 | @argument("backup") 33 | @option( 34 | "-n", 35 | "--dry-run", 36 | is_flag=True, 37 | default=False, 38 | help="Enable dry run mode and do not perform any modifying actions.", 39 | ) 40 | @pass_context 41 | def delete_backup(ctx, backup, dry_run): 42 | """Delete backup.""" 43 | chs3_backups = get_chs3_backups() 44 | if backup not in chs3_backups: 45 | raise ClickException(f"Backup {backup} not found.") 46 | 47 | delete_chs3_backups(ctx, [backup], dry_run=dry_run) 48 | 49 | 50 | @chs3_backup_group.command("cleanup") 51 | @option("-k", "--keep-going", is_flag=True, help="Do not stop on the first error.") 52 | @option( 53 | "-n", 54 | "--dry-run", 55 | is_flag=True, 56 | default=False, 57 | help="Enable dry run mode and do not perform any modifying actions.", 58 | ) 59 | @pass_context 60 | def cleanup_backups(ctx, dry_run, keep_going): 61 | """Removed unnecessary / orphaned backups.""" 62 | orphaned_chs3_backups = get_orphaned_chs3_backups() 63 | delete_chs3_backups( 64 | ctx, orphaned_chs3_backups, keep_going=keep_going, dry_run=dry_run 65 | ) 66 | 67 | 68 | def delete_chs3_backups(ctx, chs3_backups, *, keep_going=False, dry_run=False): 69 | """ 70 | Delete CHS3 backups. 71 | """ 72 | for chs3_backup in chs3_backups: 73 | try: 74 | unfreeze_backup(ctx, chs3_backup, dry_run=dry_run) 75 | except Exception as e: 76 | if keep_going: 77 | logging.warning("{!r}\n", e) 78 | else: 79 | raise 80 | 81 | 82 | def clear_empty_backup(orphaned_chs3_backup): 83 | backup_directory = os.path.join(CHS3_BACKUPS_DIRECTORY, orphaned_chs3_backup) 84 | try: 85 | backup_contents = os.listdir(backup_directory) 86 | clear_empty_directories_recursively(backup_directory) 87 | if len(os.listdir(backup_directory)) == 1 and "revision.txt" in backup_contents: 88 | os.remove(os.path.join(backup_directory, "revision.txt")) 89 | os.rmdir(backup_directory) 90 | except FileNotFoundError: 91 | logging.error( 92 | "Cannot remove backup directory {} as it doesn`t exist.\nMaybe it was already removed.", 93 | backup_directory, 94 | ) 95 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/config_command.py: -------------------------------------------------------------------------------- 1 | from click import command, pass_context 2 | 3 | from ch_tools.common.cli.formatting import print_response 4 | from ch_tools.common.clickhouse.config import ClickhouseConfig 5 | 6 | 7 | @command("config") 8 | @pass_context 9 | def config_command(ctx): 10 | """ 11 | Output ClickHouse config. 12 | """ 13 | config = ClickhouseConfig.load() 14 | print_response(ctx, config.dump()) 15 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/crash_log_group.py: -------------------------------------------------------------------------------- 1 | from click import group, option, pass_context 2 | 3 | from ch_tools.chadmin.cli.chadmin_group import Chadmin 4 | from ch_tools.chadmin.internal.utils import execute_query 5 | from ch_tools.common import logging 6 | from ch_tools.common.clickhouse.config import get_cluster_name 7 | 8 | 9 | @group("crash-log", cls=Chadmin) 10 | def crash_log_group(): 11 | """ 12 | Commands for retrieving information from system.crash_log. 13 | """ 14 | pass 15 | 16 | 17 | @crash_log_group.command("list") 18 | @option( 19 | "--cluster", 20 | "--on-cluster", 21 | "on_cluster", 22 | is_flag=True, 23 | help="Get log records from all hosts in the cluster.", 24 | ) 25 | @pass_context 26 | def list_crashes_command(ctx, on_cluster): 27 | cluster = get_cluster_name(ctx) if on_cluster else None 28 | query_str = """ 29 | SELECT 30 | {% if cluster %} 31 | hostName() "host", 32 | {% endif %} 33 | event_time, 34 | signal, 35 | thread_id, 36 | query_id, 37 | '\n' || arrayStringConcat(trace_full, '\n') AS trace, 38 | version 39 | {% if cluster %} 40 | FROM clusterAllReplicas({{ cluster }}, system.crash_log) 41 | {% else %} 42 | FROM system.crash_log 43 | {% endif %} 44 | ORDER BY event_time DESC 45 | """ 46 | logging.info(execute_query(ctx, query_str, cluster=cluster, format_="Vertical")) 47 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/diagnostics_command.py: -------------------------------------------------------------------------------- 1 | import cloup 2 | from click import Context, pass_context 3 | 4 | from ch_tools.chadmin.internal.diagnostics.diagnose import diagnose 5 | from ch_tools.common.cli.parameters import env_var_help 6 | 7 | 8 | @cloup.command("diagnostics") 9 | @cloup.option( 10 | "-o", 11 | "--format", 12 | "output_format", 13 | type=cloup.Choice( 14 | choices=["json", "yaml", "json.gz", "yaml.gz", "wiki", "wiki.gz"], 15 | case_sensitive=False, 16 | ), 17 | default="wiki", 18 | envvar="CHADMIN_DIAGNOSTICS_FORMAT", 19 | help="Output format for gathered diagnostics data. " 20 | + env_var_help("CHADMIN_DIAGNOSTICS_FORMAT"), 21 | ) 22 | @cloup.option( 23 | "-n", 24 | "--normalize-queries", 25 | is_flag=True, 26 | envvar="CHADMIN_DIAGNOSTICS_NORMALIZE_QUERIES", 27 | help="Whether to normalize queries for ClickHouse client. " 28 | + env_var_help("CHADMIN_DIAGNOSTICS_NORMALIZE_QUERIES"), 29 | ) 30 | @pass_context 31 | def diagnostics_command( 32 | ctx: Context, output_format: str, normalize_queries: bool 33 | ) -> None: 34 | """ 35 | Collect diagnostics data. 36 | """ 37 | diagnose(ctx, output_format, normalize_queries) 38 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/dictionary_group.py: -------------------------------------------------------------------------------- 1 | from click import group, option, pass_context 2 | 3 | from ch_tools.chadmin.cli.chadmin_group import Chadmin 4 | from ch_tools.chadmin.internal.dictionary import list_dictionaries, reload_dictionary 5 | from ch_tools.common import logging 6 | from ch_tools.common.cli.formatting import print_response 7 | 8 | 9 | @group("dictionary", cls=Chadmin) 10 | def dictionary_group(): 11 | """Commands to manage external dictionaries.""" 12 | pass 13 | 14 | 15 | @dictionary_group.command("list") 16 | @option("--name") 17 | @option("--status") 18 | @pass_context 19 | def list_command(ctx, name, status): 20 | """ 21 | List dictionaries. 22 | """ 23 | dictionaries = list_dictionaries(ctx, name=name, status=status) 24 | print_response( 25 | ctx, 26 | dictionaries, 27 | default_format="table", 28 | ) 29 | 30 | 31 | @dictionary_group.command("reload") 32 | @option("--name") 33 | @option("--status") 34 | @pass_context 35 | def reload_command(ctx, name, status): 36 | """ 37 | Reload one or several dictionaries. 38 | """ 39 | dictionaries = list_dictionaries(ctx, name=name, status=status) 40 | for dictionary in dictionaries: 41 | logging.info("Reloading dictionary {}", _full_name(dictionary)) 42 | reload_dictionary(ctx, database=dictionary["database"], name=dictionary["name"]) 43 | 44 | 45 | def _full_name(dictionary): 46 | db_name = dictionary["database"] 47 | dict_name = dictionary["name"] 48 | 49 | if db_name: 50 | return f"`{db_name}`.`{dict_name}`" 51 | 52 | return f"`{dict_name}`" 53 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/disk_group.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import shutil 4 | 5 | from click import group, option 6 | 7 | from ch_tools.chadmin.cli.chadmin_group import Chadmin 8 | from ch_tools.common import logging 9 | 10 | 11 | @group("disks", cls=Chadmin) 12 | def disks_group(): 13 | """Commands to manage disks.""" 14 | pass 15 | 16 | 17 | @disks_group.command("check-s3-metadata") 18 | @option( 19 | "--path", 20 | "path", 21 | default="/var/lib/clickhouse/disks/object_storage/store", 22 | help="Path to S3 metadata.", 23 | ) 24 | @option("--cleanup", is_flag=True, help="Remove parts with corrupted S3 metadata.") 25 | def check_s3_metadata_command(path, cleanup): 26 | check_dir(path, cleanup) 27 | 28 | 29 | def check_dir(path, cleanup): 30 | corrupted_dirs = [] 31 | for dirpath, _, filenames in os.walk(path): 32 | for filename in filenames: 33 | if not check_file(f"{dirpath}/{filename}"): 34 | logging.info("{}/{}", dirpath, filename) 35 | if dirpath not in corrupted_dirs: 36 | corrupted_dirs.append(dirpath) 37 | if cleanup: 38 | for dirpath in corrupted_dirs: 39 | logging.info('Remove directory "{}"', dirpath) 40 | shutil.rmtree(dirpath) 41 | 42 | 43 | def check_file(filename): 44 | with open(filename, mode="r", encoding="latin-1") as file: 45 | lines = file.readlines(1024) 46 | if len(lines) != 5: 47 | file.close() 48 | return False 49 | result = True 50 | if not re.match("[123]\n", lines[0]): # version 1-3 51 | result = False 52 | elif not re.match("1\\s+\\d+\n", lines[1]): # object count=1 & size 53 | result = False 54 | elif not re.match("\\d+\\s+\\S+\n", lines[2]): # size & object name 55 | result = False 56 | elif not re.match("\\d+\n", lines[3]): # refcount 57 | result = False 58 | elif not re.match("[01]\n?", lines[4]): # is readonly 59 | result = False 60 | 61 | return result 62 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/list_async_metrics_command.py: -------------------------------------------------------------------------------- 1 | from click import command, pass_context 2 | 3 | from ch_tools.chadmin.internal.utils import execute_query 4 | from ch_tools.common import logging 5 | 6 | 7 | @command("async-metrics") 8 | @pass_context 9 | def list_async_metrics_command(ctx): 10 | """ 11 | Show metrics from system.async_metrics. 12 | """ 13 | logging.info(execute_query(ctx, "SELECT * FROM system.asynchronous_metrics")) 14 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/list_events_command.py: -------------------------------------------------------------------------------- 1 | from click import command, pass_context 2 | 3 | from ch_tools.chadmin.internal.utils import execute_query 4 | from ch_tools.common import logging 5 | 6 | 7 | @command("events") 8 | @pass_context 9 | def list_events_command(ctx): 10 | """ 11 | Show metrics from system.events. 12 | """ 13 | logging.info(execute_query(ctx, "SELECT * FROM system.events")) 14 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/list_functions_command.py: -------------------------------------------------------------------------------- 1 | from click import command, option, pass_context 2 | 3 | from ch_tools.chadmin.internal.utils import execute_query 4 | from ch_tools.common import logging 5 | 6 | 7 | @command("functions") 8 | @option("--name") 9 | @pass_context 10 | def list_functions_command(ctx, name): 11 | """ 12 | Show available functions. 13 | """ 14 | query = """ 15 | SELECT * 16 | FROM system.functions 17 | {% if name %} 18 | WHERE lower(name) {{ format_str_imatch(name) }} 19 | {% endif %} 20 | """ 21 | logging.info(execute_query(ctx, query, name=name)) 22 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/list_macros_command.py: -------------------------------------------------------------------------------- 1 | from click import command, pass_context 2 | 3 | from ch_tools.chadmin.internal.utils import execute_query 4 | from ch_tools.common import logging 5 | 6 | 7 | @command("macros") 8 | @pass_context 9 | def list_macros_command(ctx): 10 | """ 11 | Show macros. 12 | """ 13 | logging.info(execute_query(ctx, "SELECT * FROM system.macros")) 14 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/list_metrics_command.py: -------------------------------------------------------------------------------- 1 | from click import command, pass_context 2 | 3 | from ch_tools.chadmin.internal.utils import execute_query 4 | from ch_tools.common import logging 5 | 6 | 7 | @command("metrics") 8 | @pass_context 9 | def list_metrics_command(ctx): 10 | """ 11 | Show metrics from system.metrics. 12 | """ 13 | logging.info(execute_query(ctx, "SELECT * FROM system.metrics")) 14 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/list_settings_command.py: -------------------------------------------------------------------------------- 1 | from click import command, option, pass_context 2 | 3 | from ch_tools.chadmin.internal.utils import execute_query 4 | from ch_tools.common import logging 5 | 6 | 7 | @command("settings") 8 | @option("--name") 9 | @option("--changed", is_flag=True) 10 | @pass_context 11 | def list_settings_command(ctx, name, changed): 12 | """ 13 | Show settings. 14 | """ 15 | query = """ 16 | SELECT * 17 | FROM system.settings 18 | WHERE 1 19 | {% if name %} 20 | AND lower(name) {{ format_str_imatch(name) }} 21 | {% endif %} 22 | {% if changed %} 23 | AND changed 24 | {% endif %} 25 | """ 26 | logging.info(execute_query(ctx, query, name=name, changed=changed)) 27 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/merge_group.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from click import group, option, pass_context 4 | 5 | from ch_tools.chadmin.cli.chadmin_group import Chadmin 6 | from ch_tools.chadmin.internal.process import list_merges 7 | from ch_tools.common.cli.formatting import ( 8 | format_bytes, 9 | format_float, 10 | format_percents, 11 | print_response, 12 | ) 13 | from ch_tools.common.clickhouse.config import get_cluster_name 14 | 15 | FIELD_FORMATTERS = { 16 | "total_size_bytes_compressed": format_bytes, 17 | "bytes_read_uncompressed": format_bytes, 18 | "bytes_written_uncompressed": format_bytes, 19 | "memory_usage": format_bytes, 20 | "elapsed": format_float, 21 | "progress": format_percents, 22 | } 23 | 24 | 25 | @group("merge", cls=Chadmin) 26 | def merge_group(): 27 | """Commands to manage merges (retrieve information from system.merges).""" 28 | pass 29 | 30 | 31 | @merge_group.command("list") 32 | @option( 33 | "-d", "--database", help="Filter in merges to output by the specified database." 34 | ) 35 | @option("-t", "--table", help="Filter in merges to output by the specified table.") 36 | @option("--mutation", "is_mutation", is_flag=True) 37 | @option( 38 | "--cluster", 39 | "--on-cluster", 40 | "on_cluster", 41 | is_flag=True, 42 | help="Get merges from all hosts in the cluster.", 43 | ) 44 | @option( 45 | "-l", 46 | "--limit", 47 | type=int, 48 | default=1000, 49 | help="Limit the max number of objects in the output.", 50 | ) 51 | @pass_context 52 | def list_command(ctx, on_cluster, limit, **kwargs): 53 | """List executing merges.""" 54 | 55 | def _table_formatter(merge): 56 | if merge["is_mutation"]: 57 | merge_type = "mutation" 58 | else: 59 | merge_type = f"{merge['merge_type']} {merge['merge_algorithm']} merge" 60 | return OrderedDict( 61 | ( 62 | ("database", merge["database"]), 63 | ("table", merge["table"]), 64 | ("result_part", merge["result_part_name"]), 65 | ("source_parts", "\n".join(merge["source_part_names"])), 66 | ("type", merge_type), 67 | ("elapsed", merge["elapsed"]), 68 | ("progress", merge["progress"]), 69 | ("total_size", merge["total_size_bytes_compressed"]), 70 | ("memory_usage", merge["memory_usage"]), 71 | ) 72 | ) 73 | 74 | cluster = get_cluster_name(ctx) if on_cluster else None 75 | 76 | merges = list_merges(ctx, cluster=cluster, limit=limit, **kwargs) 77 | 78 | print_response( 79 | ctx, 80 | merges, 81 | default_format="table", 82 | table_formatter=_table_formatter, 83 | field_formatters=FIELD_FORMATTERS, 84 | ) 85 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/metadata.py: -------------------------------------------------------------------------------- 1 | import re 2 | import uuid 3 | 4 | UUID_TOKEN = "UUID" 5 | ENGINE_TOKEN = "ENGINE" 6 | UUID_PATTERN = re.compile(r"UUID\s+'([a-f0-9-]+)'", re.IGNORECASE) 7 | 8 | 9 | def _is_valid_uuid(uuid_str: str) -> bool: 10 | try: 11 | val = uuid.UUID(uuid_str) 12 | except ValueError: 13 | return False 14 | return str(val) == uuid_str 15 | 16 | 17 | def parse_uuid(line: str) -> str: 18 | match = UUID_PATTERN.search(line) 19 | 20 | if not match: 21 | raise RuntimeError("Failed parse UUID from metadata.") 22 | 23 | result = match.group(1) 24 | if not _is_valid_uuid(result): 25 | raise RuntimeError("Failed parse UUID from metadata.") 26 | return result 27 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/move_group.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from cloup import group, option, pass_context 4 | 5 | from ch_tools.chadmin.cli.chadmin_group import Chadmin 6 | from ch_tools.chadmin.internal.process import list_moves 7 | from ch_tools.common.cli.formatting import format_bytes, format_float, print_response 8 | from ch_tools.common.clickhouse.config import get_cluster_name 9 | 10 | FIELD_FORMATTERS = { 11 | "part_size": format_bytes, 12 | "elapsed": format_float, 13 | } 14 | 15 | 16 | @group("move", cls=Chadmin) 17 | def move_group(): 18 | """Commands to manage moves (retrieve information from system.moves).""" 19 | pass 20 | 21 | 22 | @move_group.command("list") 23 | @option( 24 | "-d", 25 | "--database", 26 | help="Filter in moves to output by the specified database.", 27 | ) 28 | @option( 29 | "-t", 30 | "--table", 31 | help="Filter in moves to output by the specified table.", 32 | ) 33 | @option( 34 | "--cluster", 35 | "--on-cluster", 36 | "on_cluster", 37 | is_flag=True, 38 | help="Get moves from all hosts in the cluster.", 39 | ) 40 | @option( 41 | "-l", 42 | "--limit", 43 | type=int, 44 | default=1000, 45 | help="Limit the max number of objects in the output.", 46 | ) 47 | @pass_context 48 | def list_command(ctx, on_cluster, limit, **kwargs): 49 | """List executing merges.""" 50 | 51 | def _table_formatter(item): 52 | return OrderedDict( 53 | ( 54 | ("database", item["database"]), 55 | ("table", item["table"]), 56 | ("elapsed", item["elapsed"]), 57 | ("target_disk", item["target_disk_name"]), 58 | ("target_path", item["target_disk_path"]), 59 | ("part_name", item["part_name"]), 60 | ("part_size", item["part_size"]), 61 | ) 62 | ) 63 | 64 | cluster = get_cluster_name(ctx) if on_cluster else None 65 | 66 | print_response( 67 | ctx, 68 | list_moves(ctx, cluster=cluster, limit=limit, **kwargs), 69 | default_format="table", 70 | table_formatter=_table_formatter, 71 | field_formatters=FIELD_FORMATTERS, 72 | ) 73 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/process_group.py: -------------------------------------------------------------------------------- 1 | from cloup import Choice, argument, group, option, option_group, pass_context 2 | from cloup.constraints import RequireAtLeast 3 | 4 | from ch_tools.chadmin.cli.chadmin_group import Chadmin 5 | from ch_tools.chadmin.internal.process import get_process, kill_process, list_processes 6 | from ch_tools.chadmin.internal.utils import format_query 7 | from ch_tools.common.cli.formatting import print_response 8 | from ch_tools.common.clickhouse.config import get_cluster_name 9 | 10 | FIELD_FORMATTERS = { 11 | "query": format_query, 12 | } 13 | 14 | 15 | @group("process", cls=Chadmin) 16 | def process_group(): 17 | """ 18 | Commands to manage processes. 19 | """ 20 | pass 21 | 22 | 23 | @process_group.command("get") 24 | @argument("query_id") 25 | @pass_context 26 | def get_process_command(ctx, query_id): 27 | """ 28 | Get process. 29 | """ 30 | process = get_process(ctx, query_id) 31 | print_response( 32 | ctx, process, default_format="yaml", field_formatters=FIELD_FORMATTERS 33 | ) 34 | 35 | 36 | @process_group.command("list") 37 | @option("-u", "--user") 38 | @option("-U", "--exclude-user") 39 | @option("--query") 40 | @option("-v", "--verbose", is_flag=True, help="Verbose mode.") 41 | @option( 42 | "--cluster", 43 | "--on-cluster", 44 | "on_cluster", 45 | is_flag=True, 46 | help="Get records from all hosts in the cluster.", 47 | ) 48 | @option( 49 | "--order-by", 50 | type=Choice(["elapsed", "memory_usage"]), 51 | default="elapsed", 52 | help="Sorting order.", 53 | ) 54 | @option( 55 | "-l", "--limit", type=int, help="Limit the max number of objects in the output." 56 | ) 57 | @pass_context 58 | def list_processes_command( 59 | ctx, user, exclude_user, query, verbose, on_cluster, order_by, limit 60 | ): 61 | """ 62 | List processes. 63 | """ 64 | cluster = get_cluster_name(ctx) if on_cluster else None 65 | 66 | processes = list_processes( 67 | ctx, 68 | user=user, 69 | exclude_user=exclude_user, 70 | query_pattern=query, 71 | cluster=cluster, 72 | limit=limit, 73 | order_by=order_by, 74 | verbose=verbose, 75 | ) 76 | 77 | print_response( 78 | ctx, processes, default_format="yaml", field_formatters=FIELD_FORMATTERS 79 | ) 80 | 81 | 82 | @process_group.command("kill") 83 | @option_group( 84 | "Process selection options", 85 | option("-a", "--all", "_all", is_flag=True, help="Kill all processes."), 86 | option("-q", "--query", "query_id"), 87 | option("-u", "--user"), 88 | option("-U", "--exclude-user"), 89 | constraint=RequireAtLeast(1), 90 | ) 91 | @pass_context 92 | def kill_process_command(ctx, _all, query_id, user, exclude_user): 93 | """ 94 | Kill one or several processes using "KILL QUERY" query. 95 | """ 96 | kill_process(ctx, query_id=query_id, user=user, exclude_user=exclude_user) 97 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/replicated_fetch_group.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from click import group, option, pass_context 4 | 5 | from ch_tools.chadmin.cli.chadmin_group import Chadmin 6 | from ch_tools.chadmin.internal.process import list_replicated_fetches 7 | from ch_tools.common.cli.formatting import ( 8 | format_bytes, 9 | format_float, 10 | format_percents, 11 | print_response, 12 | ) 13 | from ch_tools.common.clickhouse.config import get_cluster_name 14 | 15 | FIELD_FORMATTERS = { 16 | "total_size_bytes_compressed": format_bytes, 17 | "elapsed": format_float, 18 | "progress": format_percents, 19 | } 20 | 21 | 22 | @group("replicated-fetch", cls=Chadmin) 23 | def replicated_fetch_group(): 24 | """Commands to manage fetches (retrieve information from system.replicated_fetches).""" 25 | pass 26 | 27 | 28 | @replicated_fetch_group.command("list") 29 | @option( 30 | "-d", "--database", help="Filter in merges to output by the specified database." 31 | ) 32 | @option("-t", "--table", help="Filter in merges to output by the specified table.") 33 | @option( 34 | "--cluster", 35 | "--on-cluster", 36 | "on_cluster", 37 | is_flag=True, 38 | help="Get merges from all hosts in the cluster.", 39 | ) 40 | @option( 41 | "-l", 42 | "--limit", 43 | type=int, 44 | default=1000, 45 | help="Limit the max number of objects in the output.", 46 | ) 47 | @pass_context 48 | def list_command(ctx, on_cluster, limit, **kwargs): 49 | """List executing fetches.""" 50 | 51 | def _table_formatter(fetch): 52 | return OrderedDict( 53 | ( 54 | ("database", fetch["database"]), 55 | ("table", fetch["table"]), 56 | ("result_part", fetch["result_part_name"]), 57 | ("elapsed", fetch["elapsed"]), 58 | ("progress", fetch["progress"]), 59 | ("source_replica", fetch["source_replica_hostname"]), 60 | ("total_size", fetch["total_size_bytes_compressed"]), 61 | ) 62 | ) 63 | 64 | cluster = get_cluster_name(ctx) if on_cluster else None 65 | 66 | merges = list_replicated_fetches(ctx, cluster=cluster, limit=limit, **kwargs) 67 | 68 | print_response( 69 | ctx, 70 | merges, 71 | default_format="table", 72 | table_formatter=_table_formatter, 73 | field_formatters=FIELD_FORMATTERS, 74 | ) 75 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/restore_replica_command.py: -------------------------------------------------------------------------------- 1 | from click import ClickException 2 | from cloup import command 3 | 4 | 5 | @command("restore-replica") 6 | def restore_replica_command(): 7 | raise ClickException( 8 | 'The command has been superseded by "replica restore" command.' 9 | ) 10 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/s3_credentials_config_group.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import sys 4 | import time 5 | from xml.dom import minidom 6 | 7 | import requests 8 | from click import group, option, pass_context 9 | 10 | from ch_tools.chadmin.cli.chadmin_group import Chadmin 11 | from ch_tools.chadmin.internal.system import match_ch_version 12 | from ch_tools.common.clickhouse.config.path import CLICKHOUSE_S3_CREDENTIALS_CONFIG_PATH 13 | 14 | 15 | @group("s3-credentials-config", cls=Chadmin) 16 | def s3_credentials_config_group(): 17 | """ 18 | Commands to manage S3 credentials config. 19 | """ 20 | 21 | 22 | @s3_credentials_config_group.command("update") 23 | @option( 24 | "-e", 25 | "--endpoint", 26 | "s3_endpoint", 27 | type=str, 28 | required=True, 29 | help="S3 endpoint.", 30 | ) 31 | @option( 32 | "-s", 33 | "--random-sleep", 34 | "random_sleep", 35 | default=False, 36 | help="Perform random sleep before updating S3 credentials config.", 37 | ) 38 | @pass_context 39 | def update_s3_credentials(ctx, s3_endpoint, random_sleep): 40 | """Update S3 credentials config.""" 41 | if random_sleep: 42 | time.sleep(random.randint(0, 30)) 43 | 44 | doc = minidom.Document() 45 | storage = _add_xml_node( 46 | doc, 47 | _add_xml_node(doc, _add_xml_node(doc, doc, "clickhouse"), "s3"), 48 | "cloud_storage", 49 | ) 50 | endpoint_header = ( 51 | "access_header" if match_ch_version(ctx, min_version="24.11") else "header" 52 | ) 53 | _add_xml_node(doc, storage, "endpoint").appendChild(doc.createTextNode(s3_endpoint)) 54 | _add_xml_node(doc, storage, endpoint_header).appendChild( 55 | doc.createTextNode(f"X-YaCloud-SubjectToken: {_get_token(ctx)}") 56 | ) 57 | 58 | with open(CLICKHOUSE_S3_CREDENTIALS_CONFIG_PATH, "wb") as file: 59 | file.write(doc.toprettyxml(indent=4 * " ", encoding="utf-8")) 60 | 61 | 62 | def _add_xml_node(document, root, name): 63 | node = document.createElement(name) 64 | root.appendChild(node) 65 | return node 66 | 67 | 68 | def _get_token(ctx): 69 | response = _request_token(ctx) 70 | if response.status_code != 200: 71 | sys.exit(1) 72 | data = json.loads(response.content) 73 | if data["token_type"] != "Bearer": 74 | sys.exit(1) 75 | return data["access_token"] 76 | 77 | 78 | def _request_token(ctx): 79 | endpoint = ctx.obj["config"]["cloud"]["metadata_service_endpoint"] 80 | return requests.get( 81 | f"{endpoint}/computeMetadata/v1/instance/service-accounts/default/token", 82 | headers={"Metadata-Flavor": "Google"}, 83 | timeout=60, 84 | ) 85 | -------------------------------------------------------------------------------- /ch_tools/chadmin/cli/stack_trace_command.py: -------------------------------------------------------------------------------- 1 | from click import command, pass_context 2 | 3 | from ch_tools.chadmin.internal.utils import execute_query 4 | from ch_tools.common import logging 5 | 6 | 7 | @command("stack-trace") 8 | @pass_context 9 | def stack_trace_command(ctx): 10 | """ 11 | Collect stack traces. 12 | """ 13 | query_str = r""" 14 | SELECT 15 | thread_name, 16 | min(thread_id) AS min_thread_id, 17 | count() AS threads, 18 | '\n' || arrayStringConcat( 19 | arrayMap( 20 | x, 21 | y -> concat(x, ': ', y), 22 | arrayMap(x -> addressToLine(x), trace), 23 | arrayMap(x -> demangle(addressToSymbol(x)), trace)), 24 | '\n') AS trace 25 | FROM system.stack_trace 26 | GROUP BY thread_name, trace 27 | ORDER BY min_thread_id 28 | """ 29 | logging.info(execute_query(ctx, query_str, format_="Vertical")) 30 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/ch_tools/chadmin/internal/__init__.py -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/backup.py: -------------------------------------------------------------------------------- 1 | from ch_tools.chadmin.internal.utils import execute_query 2 | 3 | 4 | def unfreeze_table(ctx, database, table, backup_name, dry_run=False): 5 | """ 6 | Perform "ALTER TABLE UNFREEZE". 7 | """ 8 | timeout = ctx.obj["config"]["clickhouse"]["unfreeze_timeout"] 9 | query = f"ALTER TABLE `{database}`.`{table}` UNFREEZE WITH NAME '{backup_name}'" 10 | execute_query(ctx, query, timeout=timeout, echo=True, format_=None, dry_run=dry_run) 11 | 12 | 13 | def unfreeze_backup(ctx, backup_name, dry_run=False): 14 | """ 15 | Perform "SYSTEM UNFREEZE". 16 | """ 17 | timeout = ctx.obj["config"]["clickhouse"]["unfreeze_timeout"] 18 | query = f"SYSTEM UNFREEZE WITH NAME '{backup_name}'" 19 | execute_query(ctx, query, timeout=timeout, echo=True, format_=None, dry_run=dry_run) 20 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/clickhouse_disks.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from typing import Optional, Tuple 3 | 4 | import xmltodict 5 | 6 | from ch_tools.chadmin.internal.system import match_str_ch_version 7 | from ch_tools.common import logging 8 | from ch_tools.common.clickhouse.config import ClickhouseConfig 9 | 10 | CLICKHOUSE_PATH = "/var/lib/clickhouse" 11 | CLICKHOUSE_STORE_PATH = CLICKHOUSE_PATH + "/store" 12 | CLICKHOUSE_DATA_PATH = CLICKHOUSE_PATH + "/data" 13 | CLICKHOUSE_METADATA_PATH = CLICKHOUSE_PATH + "/metadata" 14 | S3_PATH = CLICKHOUSE_PATH + "/disks/object_storage" 15 | S3_METADATA_STORE_PATH = S3_PATH + "/store" 16 | 17 | OBJECT_STORAGE_DISK_TYPES = ["s3", "object_storage", "ObjectStorage"] 18 | 19 | 20 | def make_ch_disks_config(disk: str) -> str: 21 | disk_config = ClickhouseConfig.load().storage_configuration.get_disk_config(disk) 22 | disk_config_path = f"/tmp/chadmin-ch-disks-{disk}.xml" 23 | logging.info("Create a conf for {} disk: {}", disk, disk_config_path) 24 | with open(disk_config_path, "w", encoding="utf-8") as f: 25 | xmltodict.unparse( 26 | { 27 | "clickhouse": { 28 | "storage_configuration": {"disks": {disk: disk_config}}, 29 | } 30 | }, 31 | f, 32 | pretty=True, 33 | ) 34 | return disk_config_path 35 | 36 | 37 | def remove_from_ch_disk( 38 | disk: str, 39 | path: str, 40 | ch_version: str, 41 | disk_config_path: Optional[str] = None, 42 | dry_run: bool = False, 43 | ) -> Tuple[int, bytes]: 44 | cmd = f"clickhouse-disks {'-C ' + disk_config_path if disk_config_path else ''} --disk {disk}" 45 | if match_str_ch_version(ch_version, "24.7"): 46 | cmd += f' --query "remove {path} --recursive"' 47 | else: 48 | cmd += f" remove {path}" 49 | 50 | logging.info("Run : {}", cmd) 51 | 52 | if dry_run: 53 | return (0, b"") 54 | 55 | proc = subprocess.run( 56 | cmd, 57 | shell=True, 58 | check=False, 59 | stdout=subprocess.PIPE, 60 | stderr=subprocess.PIPE, 61 | ) 62 | 63 | logging.info( 64 | "clickhouse-disks remove command has finished: retcode {}, stderr: {}", 65 | proc.returncode, 66 | proc.stderr.decode(), 67 | ) 68 | return (proc.returncode, proc.stderr) 69 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/database_replica.py: -------------------------------------------------------------------------------- 1 | from ch_tools.chadmin.internal.utils import execute_query 2 | 3 | 4 | def system_database_drop_replica(ctx, database_zk_path, replica, dry_run=False): 5 | """ 6 | Perform "SYSTEM DROP DATABASE REPLICA" query. 7 | """ 8 | timeout = ctx.obj["config"]["clickhouse"]["drop_replica_timeout"] 9 | query = f"SYSTEM DROP DATABASE REPLICA '{replica}' FROM ZKPATH '{database_zk_path}'" 10 | execute_query(ctx, query, timeout=timeout, echo=True, dry_run=dry_run, format_=None) 11 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/diagnostics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/ch_tools/chadmin/internal/diagnostics/__init__.py -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/diagnostics/utils.py: -------------------------------------------------------------------------------- 1 | from functools import partial, wraps 2 | 3 | 4 | def delayed(f): 5 | @wraps(f) 6 | def wrapper(*args, **kwargs): 7 | return partial(f, *args, **kwargs) 8 | 9 | return wrapper 10 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/dictionary.py: -------------------------------------------------------------------------------- 1 | from ch_tools.chadmin.internal.utils import execute_query 2 | 3 | 4 | def list_dictionaries(ctx, *, name=None, status=None): 5 | """ 6 | List external dictionaries. 7 | """ 8 | query = """ 9 | SELECT 10 | database, 11 | name, 12 | status, 13 | type, 14 | source 15 | FROM system.dictionaries 16 | WHERE 1 17 | {% if name %} 18 | AND name = '{{ name }}' 19 | {% endif %} 20 | {% if status %} 21 | AND status = '{{ status }}' 22 | {% endif %} 23 | """ 24 | return execute_query(ctx, query, name=name, status=status, format_="JSON")["data"] 25 | 26 | 27 | def reload_dictionary(ctx, *, name, database=None): 28 | """ 29 | Reload external dictionary. 30 | """ 31 | if database: 32 | full_name = f"`{database}`.`{name}`" 33 | else: 34 | full_name = f"`{name}`" 35 | 36 | query = f"""SYSTEM RELOAD DICTIONARY {full_name}""" 37 | execute_query(ctx, query, format_=None) 38 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/object_storage/__init__.py: -------------------------------------------------------------------------------- 1 | from ch_tools.chadmin.internal.object_storage.obj_list_item import ObjListItem 2 | from ch_tools.chadmin.internal.object_storage.s3_cleanup import ( 3 | cleanup_s3_object_storage, 4 | ) 5 | from ch_tools.chadmin.internal.object_storage.s3_iterator import ( 6 | ObjectSummary, 7 | s3_object_storage_iterator, 8 | ) 9 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/object_storage/obj_list_item.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class ObjListItem: 6 | """ 7 | Item of object storage listing. 8 | """ 9 | 10 | path: str 11 | size: int 12 | 13 | @classmethod 14 | def from_tab_separated(cls, value: str) -> "ObjListItem": 15 | path, size = value.split("\t") 16 | return cls(path, int(size)) 17 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/object_storage/orphaned_objects_state.py: -------------------------------------------------------------------------------- 1 | import json 2 | from dataclasses import asdict, dataclass 3 | 4 | 5 | @dataclass 6 | class OrphanedObjectsState: 7 | orphaned_objects_size: int 8 | error_msg: str 9 | 10 | @classmethod 11 | def from_json(cls, json_str: str) -> "OrphanedObjectsState": 12 | data = json.loads(json_str) 13 | return cls( 14 | orphaned_objects_size=data["orphaned_objects_size"], 15 | error_msg=data["error_msg"], 16 | ) 17 | 18 | def to_json(self) -> str: 19 | return json.dumps(asdict(self), indent=4) 20 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/object_storage/s3_cleanup.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Iterator, List, Tuple 2 | 3 | import boto3 4 | from botocore.client import Config 5 | 6 | from ch_tools.chadmin.internal.utils import chunked 7 | from ch_tools.common.clickhouse.config.storage_configuration import S3DiskConfiguration 8 | 9 | from .obj_list_item import ObjListItem 10 | 11 | BULK_DELETE_CHUNK_SIZE = 1000 12 | 13 | 14 | def cleanup_s3_object_storage( 15 | disk: S3DiskConfiguration, keys: Iterator[ObjListItem], dry_run: bool = False 16 | ) -> Tuple[int, int]: 17 | s3 = boto3.resource( 18 | "s3", 19 | endpoint_url=disk.endpoint_url, 20 | aws_access_key_id=disk.access_key_id, 21 | aws_secret_access_key=disk.secret_access_key, 22 | config=Config( 23 | s3={ 24 | "addressing_style": "auto", 25 | }, 26 | ), 27 | ) 28 | bucket = s3.Bucket(disk.bucket_name) 29 | deleted = 0 30 | total_size = 0 31 | 32 | for chunk in chunked(keys, BULK_DELETE_CHUNK_SIZE): 33 | if not dry_run: 34 | _bulk_delete(bucket, chunk) 35 | deleted += len(chunk) 36 | total_size += sum(item.size for item in chunk) 37 | 38 | return deleted, total_size 39 | 40 | 41 | def _bulk_delete(bucket: Any, items: List[ObjListItem]) -> None: 42 | objects = [{"Key": item.path} for item in items] 43 | bucket.delete_objects(Delete={"Objects": objects, "Quiet": False}) 44 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/object_storage/s3_iterator.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Iterator 2 | 3 | import boto3 # type: ignore[import] 4 | from botocore.client import Config 5 | 6 | from ch_tools.common.clickhouse.config.storage_configuration import S3DiskConfiguration 7 | 8 | ObjectSummary = Any 9 | IGNORED_OBJECT_NAME_PREFIXES = ["operations", ".SCHEMA_VERSION"] 10 | 11 | 12 | def s3_object_storage_iterator( 13 | disk: S3DiskConfiguration, 14 | *, 15 | object_name_prefix: str = "", 16 | skip_ignoring: bool = False, 17 | ) -> Iterator[ObjectSummary]: 18 | s3 = boto3.resource( 19 | "s3", 20 | endpoint_url=disk.endpoint_url, 21 | aws_access_key_id=disk.access_key_id, 22 | aws_secret_access_key=disk.secret_access_key, 23 | config=Config(s3={"addressing_style": "auto"}), 24 | ) 25 | bucket = s3.Bucket(disk.bucket_name) 26 | 27 | for obj in bucket.objects.filter(Prefix=object_name_prefix): 28 | if not skip_ignoring and _is_ignored(obj.key): 29 | continue 30 | yield obj 31 | 32 | 33 | def _is_ignored(name: str) -> bool: 34 | return any(p in name for p in IGNORED_OBJECT_NAME_PREFIXES) 35 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/object_storage/s3_object_metadata.py: -------------------------------------------------------------------------------- 1 | import re 2 | from dataclasses import dataclass 3 | from pathlib import Path 4 | from typing import List 5 | 6 | MAX_METADATA_FILE_SIZE = 10 * 1024 7 | 8 | 9 | @dataclass 10 | class S3ObjectLocalInfo: 11 | """ 12 | Information about the S3 object stored locally in the metadata file. 13 | """ 14 | 15 | key: str 16 | size: int 17 | 18 | 19 | @dataclass 20 | class S3ObjectLocalMetaData: 21 | """ 22 | Parsed content of metadata file stored on the local disk. 23 | """ 24 | 25 | version: int 26 | total_size: int 27 | objects: List[S3ObjectLocalInfo] 28 | ref_counter: int 29 | read_only: bool 30 | 31 | @classmethod 32 | def from_string(cls, value: str) -> "S3ObjectLocalMetaData": 33 | lines = value.splitlines() 34 | idx = 0 35 | 36 | matches = re.match(r"^[123]$", lines[idx]) 37 | if not matches: 38 | raise ValueError(f"Incorrect metadata version. Line: `{lines[idx]}`") 39 | version = int(matches[0]) 40 | idx += 1 41 | 42 | matches = re.match(r"^(\d+)\s+(\d+)$", lines[idx]) 43 | if not matches: 44 | raise ValueError( 45 | f"Incorrect metadata about the objects count and total size. Line: `{lines[idx]}`" 46 | ) 47 | object_count, total_size = int(matches[1]), int(matches[2]) 48 | idx += 1 49 | 50 | objects: List[S3ObjectLocalInfo] = [] 51 | for _ in range(object_count): 52 | matches = re.match(r"^(\d+)\s+(\S+)$", lines[idx]) 53 | if not matches: 54 | raise ValueError( 55 | f"Incorrect metadata about object size and name. Line: `{lines[idx]}`" 56 | ) 57 | objects.append(S3ObjectLocalInfo(key=matches[2], size=int(matches[1]))) 58 | idx += 1 59 | 60 | matches = re.match(r"^\d+$", lines[idx]) 61 | if not matches: 62 | raise ValueError( 63 | f"Incorrect metadata about refcounter. Line: `{lines[idx]}`" 64 | ) 65 | refcounter = int(lines[idx]) 66 | idx += 1 67 | 68 | matches = re.match("^[01]$", lines[idx]) 69 | if not matches: 70 | raise ValueError( 71 | f"Incorrect metadata about readonly flag. Line: `{lines[idx]}`" 72 | ) 73 | read_only = bool(int(matches[0])) 74 | 75 | return cls( 76 | version=version, 77 | total_size=total_size, 78 | objects=objects, 79 | ref_counter=refcounter, 80 | read_only=read_only, 81 | ) 82 | 83 | @classmethod 84 | def from_file(cls, path: Path) -> "S3ObjectLocalMetaData": 85 | if path.stat().st_size > MAX_METADATA_FILE_SIZE: 86 | raise ValueError( 87 | f"Metadata file too large. Its size must not exceed {MAX_METADATA_FILE_SIZE} bytes" 88 | ) 89 | with path.open(encoding="latin-1") as file: 90 | return cls.from_string(file.read()) 91 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/partition.py: -------------------------------------------------------------------------------- 1 | from ch_tools.chadmin.internal.utils import execute_query 2 | 3 | 4 | def attach_partition(ctx, database, table, partition_id, dry_run=False): 5 | """ 6 | Attach the specified table partition. 7 | """ 8 | query = f"ALTER TABLE `{database}`.`{table}` ATTACH PARTITION ID '{partition_id}'" 9 | _execute_query(ctx, query, dry_run) 10 | 11 | 12 | def detach_partition(ctx, database, table, partition_id, dry_run=False): 13 | """ 14 | Detach the specified table partition. 15 | """ 16 | query = f"ALTER TABLE `{database}`.`{table}` DETACH PARTITION ID '{partition_id}'" 17 | _execute_query(ctx, query, dry_run) 18 | 19 | 20 | def drop_partition(ctx, database, table, partition_id, dry_run=False): 21 | """ 22 | Drop the specified table partition. 23 | """ 24 | query = f"ALTER TABLE `{database}`.`{table}` DROP PARTITION ID '{partition_id}'" 25 | _execute_query(ctx, query, dry_run) 26 | 27 | 28 | def optimize_partition(ctx, database, table, partition_id, dry_run=False): 29 | """ 30 | Optimize the specified table partition. 31 | """ 32 | query = f"OPTIMIZE TABLE `{database}`.`{table}` PARTITION ID '{partition_id}'" 33 | _execute_query(ctx, query, dry_run) 34 | 35 | 36 | def materialize_ttl_in_partition(ctx, database, table, partition_id, dry_run=False): 37 | """ 38 | Materialize TTL for the specified table partition. 39 | """ 40 | query = f"ALTER TABLE `{database}`.`{table}` MATERIALIZE TTL IN PARTITION ID '{partition_id}'" 41 | _execute_query(ctx, query, dry_run) 42 | 43 | 44 | def _execute_query(ctx, query, dry_run): 45 | timeout = ctx.obj["config"]["clickhouse"]["alter_table_timeout"] 46 | execute_query(ctx, query, timeout=timeout, format_=None, echo=True, dry_run=dry_run) 47 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/system.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import re 3 | 4 | from click import Context 5 | from packaging.version import parse as parse_version 6 | 7 | from ch_tools.chadmin.internal.utils import clickhouse_client 8 | 9 | 10 | def validate_version(version: str) -> None: 11 | pattern = r"^\d+\.\d+\.\d+\.\d+(.+)?$" 12 | 13 | assert re.match(pattern, version), f"version={version} has broken format" 14 | 15 | 16 | def strip_version_suffix(version: str) -> str: 17 | """ 18 | Strips suffix after numeric version. 19 | """ 20 | 21 | return re.sub(r"^(\d+\.\d+\.\d+\.\d+)(.+)?$", r"\1", version) 22 | 23 | 24 | def get_version(ctx: Context) -> str: 25 | """ 26 | Get ClickHouse version. 27 | """ 28 | 29 | ch_version_from_config = ctx.obj["config"]["clickhouse"]["version"] 30 | if ch_version_from_config: 31 | return ch_version_from_config 32 | return clickhouse_client(ctx).get_clickhouse_version() 33 | 34 | 35 | def match_ch_version(ctx: Context, min_version: str) -> bool: 36 | """ 37 | Returns True if ClickHouse version >= min_version. 38 | """ 39 | return match_str_ch_version(get_version(ctx), min_version) 40 | 41 | 42 | def match_str_ch_version(version: str, min_version: str) -> bool: 43 | """ 44 | Returns True if ClickHouse version >= min_version. 45 | """ 46 | validate_version(version) 47 | 48 | return operator.ge( 49 | parse_version(strip_version_suffix(version)), 50 | parse_version(strip_version_suffix(min_version)), 51 | ) 52 | -------------------------------------------------------------------------------- /ch_tools/chadmin/internal/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions. 3 | """ 4 | 5 | import re 6 | import subprocess 7 | from itertools import islice 8 | from typing import Iterable, Iterator 9 | 10 | from ch_tools.common import logging 11 | from ch_tools.common.clickhouse.client.clickhouse_client import clickhouse_client 12 | 13 | 14 | def execute_query( 15 | ctx, 16 | query, 17 | timeout=None, 18 | echo=False, 19 | dry_run=False, 20 | format_="default", 21 | stream=False, 22 | settings=None, 23 | **kwargs, 24 | ): 25 | """ 26 | Execute ClickHouse query. 27 | """ 28 | if format_ == "default": 29 | format_ = "PrettyCompact" 30 | 31 | return clickhouse_client(ctx).query( 32 | query=query, 33 | query_args=kwargs, 34 | timeout=timeout, 35 | format_=format_, 36 | echo=echo, 37 | dry_run=dry_run, 38 | stream=stream, 39 | settings=settings, 40 | ) 41 | 42 | 43 | def format_query(query): 44 | """ 45 | Format SQL query for output. 46 | """ 47 | return re.sub(r"(\A|\n)\s*\n", r"\1", query, re.MULTILINE) 48 | 49 | 50 | def chunked(iterable: Iterable, n: int) -> Iterator[list]: 51 | """ 52 | Chunkify data into lists of length n. The last chunk may be shorter. 53 | 54 | Based on https://docs.python.org/3/library/itertools.html#itertools-recipes 55 | 56 | >>> chunked('ABCDEFG', 3) 57 | ABC DEF G 58 | """ 59 | if n < 1: 60 | raise ValueError("n must be at least one") 61 | it = iter(iterable) 62 | 63 | while True: 64 | chunk = list(islice(it, n)) 65 | if not chunk: 66 | break 67 | yield chunk 68 | 69 | 70 | def replace_macros(string: str, macros: dict) -> str: 71 | """ 72 | Substitute macros in the specified string. Macros in string are specified in the form "{macro_name}". 73 | 74 | Example: 75 | >>> replace_macros('{a} and {b}', {'a': '1', 'b': '2'}) 76 | 1 and 2 77 | """ 78 | return re.sub( 79 | string=string, 80 | pattern=r"{([^{}]+)}", 81 | repl=lambda m: macros.get(m.group(1), m.group(0)), 82 | ) 83 | 84 | 85 | def remove_from_disk(path): 86 | cmd = f"rm -rf {path}" 87 | logging.info("Run : {}", cmd) 88 | 89 | proc = subprocess.run( 90 | cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE 91 | ) 92 | return (proc.returncode, proc.stderr) 93 | -------------------------------------------------------------------------------- /ch_tools/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/ch_tools/common/__init__.py -------------------------------------------------------------------------------- /ch_tools/common/backup.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import subprocess 4 | from datetime import timedelta 5 | from typing import List 6 | 7 | import yaml 8 | 9 | from ch_tools.common.clickhouse.client.retry import retry 10 | 11 | CHS3_BACKUPS_DIRECTORY = "/var/lib/clickhouse/disks/object_storage/shadow/" 12 | 13 | 14 | class BackupConfig: 15 | """ 16 | Configuration of ch-backup tool. 17 | """ 18 | 19 | def __init__(self, config): 20 | self._config = config 21 | 22 | @property 23 | def deduplication_age_limit(self): 24 | return timedelta(**self._config["backup"]["deduplication_age_limit"]) 25 | 26 | @property 27 | def retain_count(self): 28 | return self._config["backup"]["retain_count"] 29 | 30 | @staticmethod 31 | def load(): 32 | with open( 33 | "/etc/yandex/ch-backup/ch-backup.conf", "r", encoding="utf-8" 34 | ) as file: 35 | return BackupConfig(yaml.safe_load(file)) 36 | 37 | 38 | @retry(json.decoder.JSONDecodeError) 39 | def get_backups() -> List[dict]: 40 | """ 41 | Get ClickHouse backups. 42 | """ 43 | return json.loads(run("sudo ch-backup list -a -v --format json")) 44 | 45 | 46 | def get_chs3_backups() -> List[str]: 47 | if os.path.exists(CHS3_BACKUPS_DIRECTORY): 48 | return os.listdir(CHS3_BACKUPS_DIRECTORY) 49 | 50 | return [] 51 | 52 | 53 | def get_orphaned_chs3_backups() -> List[str]: 54 | backups = get_backups() 55 | chs3_backups = get_chs3_backups() 56 | return list(set(chs3_backups) - set(backup["name"] for backup in backups)) 57 | 58 | 59 | def run(command, data=None): 60 | """ 61 | Run the command and return its output. 62 | """ 63 | # pylint: disable=consider-using-with 64 | 65 | proc = subprocess.Popen( 66 | command, 67 | shell=True, 68 | stdin=subprocess.PIPE, 69 | stdout=subprocess.PIPE, 70 | stderr=subprocess.PIPE, 71 | ) 72 | 73 | encoded_data = data.encode() if data else None 74 | 75 | stdout, stderr = proc.communicate(input=encoded_data) 76 | 77 | if proc.returncode: 78 | message = f'Command "{command}" failed with code {proc.returncode}' 79 | if stderr: 80 | message = f"{message}\n{stderr.decode().strip()}" 81 | raise RuntimeError(message) 82 | 83 | return stdout.decode() 84 | -------------------------------------------------------------------------------- /ch_tools/common/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/ch_tools/common/cli/__init__.py -------------------------------------------------------------------------------- /ch_tools/common/cli/context_settings.py: -------------------------------------------------------------------------------- 1 | from cloup import Context, HelpFormatter, HelpTheme 2 | 3 | __all__ = [ 4 | "CONTEXT_SETTINGS", 5 | ] 6 | 7 | CONTEXT_SETTINGS = Context.settings( 8 | help_option_names=["-h", "--help"], 9 | terminal_width=120, 10 | align_option_groups=False, 11 | align_sections=True, 12 | show_constraints=True, 13 | show_default=True, 14 | formatter_settings=HelpFormatter.settings( 15 | theme=HelpTheme.light(), 16 | ), 17 | ) 18 | -------------------------------------------------------------------------------- /ch_tools/common/cli/locale_resolver.py: -------------------------------------------------------------------------------- 1 | import locale 2 | import os 3 | import subprocess 4 | from typing import List, Tuple 5 | 6 | __all__ = [ 7 | "LocaleResolver", 8 | ] 9 | 10 | 11 | class LocaleResolver: 12 | """ 13 | Sets the locale for Click. Otherwise, it may fail with an error like 14 | 15 | ``` 16 | RuntimeError: Click discovered that you exported a UTF-8 locale 17 | but the locale system could not pick up from it because it does not exist. 18 | The exported locale is 'en_US.UTF-8' but it is not supported. 19 | ``` 20 | """ 21 | 22 | @staticmethod 23 | def resolve(): 24 | lang, _ = locale.getlocale() 25 | locales, has_c, has_en_us = LocaleResolver._get_utf8_locales() 26 | 27 | langs = map(lambda loc: str.lower(loc[0]), locales) 28 | if lang is None or lang.lower() not in langs: 29 | if has_c: 30 | lang = "C" 31 | elif has_en_us: 32 | lang = "en_US" 33 | else: 34 | raise RuntimeError( 35 | f'Locale "{lang}" is not supported. ' 36 | 'We tried to use "C" and "en_US" but they\'re absent on your machine.', 37 | ) 38 | 39 | for locale_ in locales: 40 | if lang != locale_[0]: 41 | continue 42 | 43 | os.environ["LC_ALL"] = f"{lang}.{locale_[1]}" 44 | os.environ["LANG"] = f"{lang}.{locale_[1]}" 45 | 46 | @staticmethod 47 | def _get_utf8_locales() -> Tuple[List[Tuple[str, str]], bool, bool]: 48 | try: 49 | with subprocess.Popen( 50 | ["locale", "-a"], 51 | stdout=subprocess.PIPE, 52 | stderr=subprocess.PIPE, 53 | encoding="ascii", 54 | errors="replace", 55 | ) as proc: 56 | stdout, _ = proc.communicate() 57 | except OSError: 58 | stdout = "" 59 | 60 | langs = [] 61 | encodings = [] 62 | 63 | has_c = False 64 | has_en_us = False 65 | 66 | for line in stdout.splitlines(): 67 | locale_ = line.strip() 68 | if not locale_.lower().endswith(("utf-8", "utf8")): 69 | continue 70 | 71 | lang, encoding = locale_.split(".") 72 | 73 | langs.append(lang) 74 | encodings.append(encoding) 75 | 76 | has_c |= lang.lower() == "c" 77 | has_en_us |= lang.lower() == "en_us" 78 | 79 | res = list(zip(langs, encodings)) 80 | 81 | return res, has_c, has_en_us 82 | -------------------------------------------------------------------------------- /ch_tools/common/cli/progress_bar.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Generator, Sequence, TypeVar, Union 2 | 3 | from tqdm import tqdm 4 | 5 | __all__ = ["progress"] 6 | 7 | T = TypeVar("T") 8 | 9 | 10 | def progress( 11 | i: Sequence[Union[T, Any]], description: str 12 | ) -> Generator[Union[T, Any], None, None]: 13 | yield from tqdm(i, desc=description, colour="green") 14 | -------------------------------------------------------------------------------- /ch_tools/common/cli/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions. 3 | """ 4 | 5 | from collections import defaultdict 6 | from datetime import datetime, timedelta 7 | 8 | import humanfriendly 9 | from dateutil.tz import gettz 10 | from deepdiff import DeepDiff 11 | 12 | 13 | def parse_timespan(value): 14 | """ 15 | Parse time span value. 16 | """ 17 | return timedelta(seconds=humanfriendly.parse_timespan(value)) 18 | 19 | 20 | def now(ctx): 21 | """ 22 | Like `datetime.now`, but with timezone information. 23 | """ 24 | return datetime.now(get_timezone(ctx)) 25 | 26 | 27 | def get_timezone(ctx): 28 | if "timezone" not in ctx.obj: 29 | config = ctx.obj["config"] 30 | ctx.obj["timezone"] = gettz(config.get("timezone", "UTC")) 31 | 32 | return ctx.obj["timezone"] 33 | 34 | 35 | def diff_objects(value1, value2): 36 | """ 37 | Calculate structural diff between 2 values. 38 | """ 39 | return DeepDiff( 40 | value1, 41 | value2, 42 | verbose_level=2, 43 | view="tree", 44 | ignore_type_in_groups=[(dict, defaultdict)], 45 | ) 46 | 47 | 48 | class Nullable: 49 | """ 50 | Nullable wrapper type. It helps to distinguish the cases when a value is not specified vs. 51 | it's specified None value. 52 | """ 53 | 54 | def __init__(self, value=None): 55 | self.value = value 56 | 57 | 58 | def flatten_nullable(value): 59 | if value is None: 60 | return False, None 61 | 62 | if isinstance(value, Nullable): 63 | value = value.value 64 | 65 | return True, value 66 | 67 | 68 | def is_not_null(value): 69 | """ 70 | Return True if the value is not null. 71 | """ 72 | if isinstance(value, Nullable): 73 | value = value.value 74 | 75 | return value is not None 76 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/ch_tools/common/clickhouse/__init__.py -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/client/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ClickHouse client. 3 | """ 4 | 5 | from .clickhouse_client import ClickhouseClient 6 | from .error import ClickhouseError 7 | from .query_output_format import OutputFormat 8 | 9 | __all__ = [ 10 | "ClickhouseClient", 11 | "ClickhouseError", 12 | "OutputFormat", 13 | ] 14 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/client/error.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from requests import Response 4 | 5 | 6 | class ClickhouseError(Exception): 7 | """ 8 | ClickHouse interaction error. 9 | """ 10 | 11 | def __init__(self, query: str, response: Response) -> None: 12 | self.query = re.sub(r"\s*\n\s*", " ", query.strip()) 13 | self.response = response 14 | super().__init__(f"{self.response.text.strip()}\n\nQuery: {self.query}") 15 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/client/query.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional 2 | 3 | 4 | class Query: 5 | mask = "*****" 6 | 7 | def __init__( 8 | self, 9 | value: str, 10 | sensitive_args: Optional[Dict[str, str]] = None, 11 | ): 12 | self.value = value 13 | self.sensitive_args = sensitive_args or {} 14 | 15 | def for_execute(self) -> str: 16 | return self._render(False) 17 | 18 | def _render(self, mask_sensitive: bool = True) -> str: 19 | if not self.sensitive_args: 20 | return self.value 21 | sensitive_args = ( 22 | self._sensitive_args_mask() if mask_sensitive else self.sensitive_args 23 | ) 24 | return self.value.format(**sensitive_args) 25 | 26 | def _sensitive_args_mask(self) -> Dict[str, str]: 27 | return {key: self.mask for key in self.sensitive_args} 28 | 29 | def __str__(self) -> str: 30 | return self._render(True) 31 | 32 | def __repr__(self) -> str: 33 | return f"{self.__class__.__name__}(value='{str(self)}', sensitive_args={self._sensitive_args_mask()})" 34 | 35 | def __eq__(self, other: Any) -> bool: 36 | return isinstance(other, self.__class__) and repr(self) == repr(other) 37 | 38 | def __hash__(self) -> int: 39 | return hash(repr(self)) 40 | 41 | def __add__(self, other: str) -> "Query": 42 | return Query(self.value + other, self.sensitive_args) 43 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/client/query_output_format.py: -------------------------------------------------------------------------------- 1 | """ 2 | Query output format enumeration. 3 | https://clickhouse.com/docs/en/interfaces/formats 4 | """ 5 | 6 | from ch_tools.common.type import StrEnum 7 | 8 | # pylint: disable=invalid-name 9 | 10 | 11 | class OutputFormat(StrEnum): 12 | Default = "PrettyCompact" 13 | TabSeparated = "TabSeparated" 14 | TabSeparatedRaw = "TabSeparatedRaw" 15 | TabSeparatedWithNames = "TabSeparatedWithNames" 16 | TabSeparatedWithNamesAndTypes = "TabSeparatedWithNamesAndTypes" 17 | TabSeparatedRawWithNames = "TabSeparatedRawWithNames" 18 | TabSeparatedRawWithNamesAndTypes = "TabSeparatedRawWithNamesAndTypes" 19 | CSV = "CSV" 20 | CSVWithNames = "CSVWithNames" 21 | CSVWithNamesAndTypes = "CSVWithNamesAndTypes" 22 | SQLInsert = "SQLInsert" 23 | Values = "Values" 24 | Vertical = "Vertical" 25 | JSON = "JSON" 26 | JSONStrings = "JSONStrings" 27 | JSONColumns = "JSONColumns" 28 | JSONColumnsWithMetadata = "JSONColumnsWithMetadata" 29 | JSONCompact = "JSONCompact" 30 | JSONCompactStrings = "JSONCompactStrings" 31 | JSONCompactColumns = "JSONCompactColumns" 32 | JSONEachRow = "JSONEachRow" 33 | PrettyJSONEachRow = "PrettyJSONEachRow" 34 | JSONEachRowWithProgress = "JSONEachRowWithProgress" 35 | JSONStringsEachRow = "JSONStringsEachRow" 36 | JSONStringsEachRowWithProgress = "JSONStringsEachRowWithProgress" 37 | JSONCompactEachRow = "JSONCompactEachRow" 38 | JSONCompactEachRowWithNames = "JSONCompactEachRowWithNames" 39 | JSONCompactEachRowWithNamesAndTypes = "JSONCompactEachRowWithNamesAndTypes" 40 | JSONCompactStringsEachRow = "JSONCompactStringsEachRow" 41 | JSONCompactStringsEachRowWithNames = "JSONCompactStringsEachRowWithNames" 42 | JSONCompactStringsEachRowWithNamesAndTypes = ( 43 | "JSONCompactStringsEachRowWithNamesAndTypes" 44 | ) 45 | JSONObjectEachRow = "JSONObjectEachRow" 46 | BSONEachRow = "BSONEachRow" 47 | TSKV = "TSKV" 48 | Pretty = "Pretty" 49 | PrettyNoEscapes = "PrettyNoEscapes" 50 | PrettyMonoBlock = "PrettyMonoBlock" 51 | PrettyNoEscapesMonoBlock = "PrettyNoEscapesMonoBlock" 52 | PrettyCompact = "PrettyCompact" 53 | PrettyCompactNoEscapes = "PrettyCompactNoEscapes" 54 | PrettyCompactMonoBlock = "PrettyCompactMonoBlock" 55 | PrettyCompactNoEscapesMonoBlock = "PrettyCompactNoEscapesMonoBlock" 56 | PrettySpace = "PrettySpace" 57 | PrettySpaceNoEscapes = "PrettySpaceNoEscapes" 58 | PrettySpaceMonoBlock = "PrettySpaceMonoBlock" 59 | PrettySpaceNoEscapesMonoBlock = "PrettySpaceNoEscapesMonoBlock" 60 | Prometheus = "Prometheus" 61 | Protobuf = "Protobuf" 62 | ProtobufSingle = "ProtobufSingle" 63 | Avro = "Avro" 64 | Parquet = "Parquet" 65 | Arrow = "Arrow" 66 | ORC = "ORC" 67 | RowBinary = "RowBinary" 68 | RowBinaryWithNames = "RowBinaryWithNames" 69 | RowBinaryWithNamesAndTypes = "RowBinaryWithNamesAndTypes" 70 | Native = "Native" 71 | Null = "Null" 72 | XML = "XML" 73 | CapnProto = "CapnProto" 74 | LineAsString = "LineAsString" 75 | RawBLOB = "RawBLOB" 76 | MsgPack = "MsgPack" 77 | Markdown = "Markdown" 78 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/client/retry.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Tuple, Type, Union 2 | 3 | import tenacity 4 | 5 | 6 | def retry( 7 | exception_types: Union[Type[BaseException], Tuple[Type[BaseException]]], 8 | max_attempts: int = 5, 9 | max_interval: int = 5, 10 | ) -> Any: 11 | """ 12 | Function decorator that retries wrapped function on failures. 13 | """ 14 | return tenacity.retry( 15 | retry=tenacity.retry_if_exception_type(exception_types), 16 | wait=tenacity.wait_random_exponential(multiplier=0.5, max=max_interval), 17 | stop=tenacity.stop_after_attempt(max_attempts), 18 | reraise=True, 19 | ) 20 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/client/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | 4 | def _format_str_match(value: Optional[str]) -> Optional[str]: 5 | # pylint: disable=consider-using-f-string 6 | 7 | if value is None: 8 | return None 9 | 10 | if value.find(",") < 0: 11 | return f"LIKE '{value}'" 12 | 13 | return "IN ({0})".format(",".join(f"'{item.strip()}'" for item in value.split(","))) 14 | 15 | 16 | def _format_str_imatch(value: Optional[str]) -> Optional[str]: 17 | if value is None: 18 | return None 19 | 20 | return _format_str_match(value.lower()) 21 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/config/__init__.py: -------------------------------------------------------------------------------- 1 | from click import Context 2 | 3 | from .clickhouse import ClickhouseConfig 4 | from .clickhouse_keeper import ClickhouseKeeperConfig 5 | from .users import ClickhouseUsersConfig 6 | from .zookeeper import ClickhouseZookeeperConfig 7 | 8 | __all__ = [ 9 | "ClickhouseConfig", 10 | "ClickhouseKeeperConfig", 11 | "ClickhouseUsersConfig", 12 | "ClickhouseZookeeperConfig", 13 | ] 14 | 15 | 16 | def get_clickhouse_config(ctx: Context) -> ClickhouseConfig: 17 | if "clickhouse_config" not in ctx.obj: 18 | ctx.obj["clickhouse_config"] = ClickhouseConfig.load() 19 | 20 | return ctx.obj["clickhouse_config"] 21 | 22 | 23 | def get_macros(ctx): 24 | return get_clickhouse_config(ctx).macros 25 | 26 | 27 | def get_cluster_name(ctx): 28 | return get_clickhouse_config(ctx).cluster_name 29 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/config/clickhouse.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from enum import Enum 3 | from typing import Dict 4 | 5 | from ch_tools.common.clickhouse.config.storage_configuration import ( 6 | ClickhouseStorageConfiguration, 7 | ) 8 | 9 | from ...utils import first_value 10 | from .path import ( 11 | CLICKHOUSE_CERT_PATH_DEFAULT, 12 | CLICKHOUSE_SERVER_CONFIG_PATH, 13 | CLICKHOUSE_SERVER_PREPROCESSED_CONFIG_PATH, 14 | ) 15 | from .utils import dump_config, load_config 16 | from .zookeeper import ClickhouseZookeeperConfig 17 | 18 | 19 | class ClickhousePort(Enum): 20 | TCP = 1 21 | TCP_SECURE = 2 22 | HTTP = 3 23 | HTTPS = 4 24 | 25 | 26 | class ClickhouseConfig: 27 | """ 28 | ClickHouse server config (config.xml). 29 | """ 30 | 31 | def __init__(self, config, preprocessed): 32 | self._config = config 33 | self.preprocessed = preprocessed 34 | 35 | @property 36 | def _config_root(self) -> dict: 37 | return first_value(self._config) 38 | 39 | @property 40 | def macros(self): 41 | """ 42 | ClickHouse macros. 43 | """ 44 | macros = self._config_root.get("macros", {}) 45 | return {key: value for key, value in macros.items() if not key.startswith("@")} 46 | 47 | @property 48 | def cluster_name(self): 49 | return self.macros["cluster"] 50 | 51 | @property 52 | def zookeeper(self) -> ClickhouseZookeeperConfig: 53 | """ 54 | ZooKeeper configuration. 55 | """ 56 | return ClickhouseZookeeperConfig(self._config_root.get("zookeeper", {})) 57 | 58 | @property 59 | def storage_configuration(self) -> ClickhouseStorageConfiguration: 60 | return ClickhouseStorageConfiguration( 61 | self._config_root.get("storage_configuration", {}) 62 | ) 63 | 64 | @property 65 | def ports(self) -> Dict[ClickhousePort, int]: 66 | settings = { 67 | "tcp_port": ClickhousePort.TCP, 68 | "tcp_port_secure": ClickhousePort.TCP_SECURE, 69 | "http_port": ClickhousePort.HTTP, 70 | "https_port": ClickhousePort.HTTPS, 71 | } 72 | 73 | result = {} 74 | for setting_name, port in settings.items(): 75 | value = self._config_root.get(setting_name) 76 | if value: 77 | result[port] = int(value) 78 | 79 | return result 80 | 81 | @property 82 | def cert_path(self): 83 | openssl_server_config = self._config_root.get("openSSL", {}).get("server", {}) 84 | return openssl_server_config.get("caConfig", CLICKHOUSE_CERT_PATH_DEFAULT) 85 | 86 | def dump(self, mask_secrets=True): 87 | return dump_config(self._config, mask_secrets=mask_secrets) 88 | 89 | def dump_xml(self, mask_secrets=True): 90 | return dump_config(self._config, mask_secrets=mask_secrets, xml_format=True) 91 | 92 | @staticmethod 93 | def load(try_preprocessed=False): 94 | if try_preprocessed and os.path.exists( 95 | CLICKHOUSE_SERVER_PREPROCESSED_CONFIG_PATH 96 | ): 97 | config = load_config(CLICKHOUSE_SERVER_PREPROCESSED_CONFIG_PATH) 98 | return ClickhouseConfig(config, preprocessed=True) 99 | 100 | config = load_config(CLICKHOUSE_SERVER_CONFIG_PATH) 101 | return ClickhouseConfig(config, preprocessed=False) 102 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/config/clickhouse_keeper.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ...utils import first_value 4 | from .path import ( 5 | CLICKHOUSE_KEEPER_CONFIG_PATH, 6 | CLICKHOUSE_SERVER_PREPROCESSED_CONFIG_PATH, 7 | ) 8 | from .utils import dump_config, load_config 9 | 10 | 11 | class ClickhouseKeeperConfig: 12 | """ 13 | ClickHouse keeper server config (config.xml). 14 | """ 15 | 16 | def __init__(self, config, config_path): 17 | self._config = config 18 | self._config_path = config_path 19 | 20 | @property 21 | def _clickhouse(self): 22 | return first_value(self._config) 23 | 24 | @property 25 | def _keeper_server(self): 26 | return self._clickhouse.get("keeper_server", {}) 27 | 28 | @property 29 | def port_pair(self): 30 | """ 31 | :returns tuple (ClickHouse port, port is secure) 32 | If both and are present, a secure port 33 | is returned. 34 | """ 35 | secure_port = self._keeper_server.get("tcp_port_secure") 36 | if secure_port is not None: 37 | return int(secure_port), True 38 | 39 | return int(self._keeper_server.get("tcp_port", 0)), False 40 | 41 | @property 42 | def tls_cert_path(self): 43 | return ( 44 | self._clickhouse.get("openSSL", {}) 45 | .get("server", {}) 46 | .get("certificateFile", None) 47 | ) 48 | 49 | @property 50 | def snapshots_dir(self): 51 | return self._keeper_server.get("snapshot_storage_path") 52 | 53 | @property 54 | def storage_dir(self): 55 | return self._keeper_server.get("storage_path") 56 | 57 | @property 58 | def separated(self): 59 | """ 60 | Return True if ClickHouse Keeper is configured to run in separate process. 61 | """ 62 | return self._config_path == CLICKHOUSE_KEEPER_CONFIG_PATH 63 | 64 | def dump(self, mask_secrets=True): 65 | return dump_config(self._config, mask_secrets=mask_secrets) 66 | 67 | def dump_xml(self, mask_secrets=True): 68 | return dump_config(self._config, mask_secrets=mask_secrets, xml_format=True) 69 | 70 | @staticmethod 71 | def load(): 72 | if os.path.exists(CLICKHOUSE_KEEPER_CONFIG_PATH): 73 | config_path = CLICKHOUSE_KEEPER_CONFIG_PATH 74 | else: 75 | config_path = CLICKHOUSE_SERVER_PREPROCESSED_CONFIG_PATH 76 | 77 | config = load_config(config_path) 78 | return ClickhouseKeeperConfig(config, config_path) 79 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/config/path.py: -------------------------------------------------------------------------------- 1 | CLICKHOUSE_SERVER_PREPROCESSED_CONFIG_PATH = ( 2 | "/var/lib/clickhouse/preprocessed_configs/config.xml" 3 | ) 4 | CLICKHOUSE_SERVER_CONFIG_PATH = "/etc/clickhouse-server/config.xml" 5 | CLICKHOUSE_RESETUP_CONFIG_PATH = "/etc/clickhouse-server/config.d/resetup_config.xml" 6 | CLICKHOUSE_S3_CREDENTIALS_CONFIG_PATH = ( 7 | "/etc/clickhouse-server/config.d/s3_credentials.xml" 8 | ) 9 | CLICKHOUSE_KEEPER_CONFIG_PATH = "/etc/clickhouse-keeper/config.xml" 10 | CLICKHOUSE_USERS_CONFIG_PATH = "/etc/clickhouse-server/users.xml" 11 | CLICKHOUSE_CERT_PATH_DEFAULT = "/etc/clickhouse-server/ssl/allCAs.pem" 12 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/config/storage_configuration.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from urllib.parse import urlparse 3 | 4 | # YC specific value for sanity checking 5 | 6 | 7 | @dataclass 8 | class S3DiskConfiguration: 9 | name: str 10 | endpoint_url: str 11 | access_key_id: str 12 | secret_access_key: str 13 | bucket_name: str 14 | prefix: str 15 | 16 | 17 | class ClickhouseStorageConfiguration: 18 | """ 19 | Storage configuration section of ClickHouse server config. 20 | """ 21 | 22 | def __init__(self, config: dict) -> None: 23 | self._config = config 24 | 25 | def has_disk(self, name: str) -> bool: 26 | return name in self._config.get("disks", {}) 27 | 28 | def s3_disk_configuration( 29 | self, name: str, bucket_name_prefix: str 30 | ) -> S3DiskConfiguration: 31 | if not self.has_disk(name): 32 | raise RuntimeError(f"Config section for disk '{name}' is not found") 33 | 34 | disk = self._config["disks"][name] 35 | 36 | if disk["type"] != "s3": 37 | raise TypeError(f"Unsupported object storage type {disk['type']}") 38 | 39 | access_key_id = disk["access_key_id"] 40 | secret_access_key = disk["secret_access_key"] 41 | endpoint: str = disk["endpoint"] 42 | 43 | _host, bucket_name, prefix, endpoint_url = _parse_endpoint( 44 | endpoint, bucket_name_prefix 45 | ) 46 | 47 | return S3DiskConfiguration( 48 | name=name, 49 | endpoint_url=endpoint_url, 50 | access_key_id=access_key_id, 51 | secret_access_key=secret_access_key, 52 | bucket_name=bucket_name, 53 | prefix=prefix, 54 | ) 55 | 56 | def get_disk_config(self, disk: str) -> dict: 57 | return (self._config.get("disks", {})).get(disk, {}) 58 | 59 | 60 | def _parse_endpoint(endpoint: str, bucket_name_prefix: str) -> tuple: 61 | """ 62 | Parse both virtual and path style S3 endpoints url. 63 | """ 64 | url = urlparse(endpoint) 65 | if url.hostname is None: 66 | raise ValueError(f"Incorrect endpoint format {endpoint}") 67 | 68 | path = url.path[1:] if url.path.startswith("/") else url.path 69 | if url.hostname.startswith(bucket_name_prefix): 70 | # virtual addressing style 71 | bucket_name, host = url.hostname.split(".", maxsplit=1) 72 | prefix = path 73 | else: 74 | # path addressing style 75 | host = url.hostname 76 | bucket_name, prefix = path.split("/", maxsplit=1) 77 | if not bucket_name.startswith(bucket_name_prefix): 78 | raise ValueError( 79 | f"Unexpected bucket name `{bucket_name}`. Parser expects `{bucket_name_prefix}` prefix" 80 | ) 81 | 82 | endpoint_url = f"{url.scheme}://{host}" 83 | if url.port: 84 | endpoint_url += f":{url.port}" 85 | 86 | return host, bucket_name, prefix, endpoint_url 87 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/config/users.py: -------------------------------------------------------------------------------- 1 | from .path import CLICKHOUSE_USERS_CONFIG_PATH 2 | from .utils import dump_config, load_config 3 | 4 | 5 | class ClickhouseUsersConfig: 6 | """ 7 | ClickHouse users config (users.xml). 8 | """ 9 | 10 | def __init__(self, config): 11 | self._config = config 12 | 13 | def dump(self, mask_secrets=True): 14 | return dump_config(self._config, mask_secrets=mask_secrets) 15 | 16 | def dump_xml(self, mask_secrets=True): 17 | return dump_config(self._config, mask_secrets=mask_secrets, xml_format=True) 18 | 19 | @staticmethod 20 | def load(): 21 | return ClickhouseUsersConfig( 22 | load_config(CLICKHOUSE_USERS_CONFIG_PATH, "users.d") 23 | ) 24 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/config/utils.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from copy import deepcopy 3 | from typing import MutableMapping 4 | 5 | import xmltodict 6 | 7 | from ch_tools.common.utils import first_value 8 | 9 | 10 | def load_config(config_path, configd_dir="config.d"): 11 | """ 12 | Load ClickHouse config file. 13 | """ 14 | # Load main config file. 15 | config = _load_config(config_path) 16 | 17 | # Load config files from config.d/ directory. 18 | configd_path = os.path.join(os.path.dirname(config_path), configd_dir) 19 | if os.path.exists(configd_path): 20 | for file in os.listdir(configd_path): 21 | file_path = os.path.join(configd_path, file) 22 | _merge_configs(config, _load_config(file_path)) 23 | 24 | # Process includes. 25 | root_section = first_value(config) 26 | include_file = root_section.get("include_from") 27 | if include_file: 28 | include_config = first_value(_load_config(include_file)) 29 | _apply_config_directives(root_section, include_config) 30 | 31 | return config 32 | 33 | 34 | def dump_config(config, *, mask_secrets=True, xml_format=False): 35 | """ 36 | Dump ClickHouse config. 37 | """ 38 | result = deepcopy(config) 39 | 40 | if mask_secrets: 41 | _mask_secrets(result) 42 | 43 | if xml_format: 44 | result = xmltodict.unparse(result, pretty=True) 45 | 46 | return result 47 | 48 | 49 | def _load_config(config_path): 50 | with open(config_path, "r", encoding="utf-8") as file: 51 | return xmltodict.parse(file.read()) 52 | 53 | 54 | def _merge_configs(main_config, additional_config): 55 | for key, value in additional_config.items(): 56 | if key not in main_config: 57 | main_config[key] = value 58 | continue 59 | 60 | if isinstance(main_config[key], dict) and isinstance(value, dict): 61 | _merge_configs(main_config[key], value) 62 | continue 63 | 64 | if value is not None: 65 | main_config[key] = value 66 | 67 | 68 | def _apply_config_directives(config_section, include_config): 69 | for key, item in config_section.items(): 70 | if not isinstance(item, dict): 71 | continue 72 | 73 | include = item.get("@incl") 74 | if include: 75 | config_section[key] = include_config[include] 76 | continue 77 | 78 | _apply_config_directives(item, include_config) 79 | 80 | 81 | def _mask_secrets(config): 82 | if isinstance(config, MutableMapping): 83 | for key, value in list(config.items()): 84 | if isinstance(value, MutableMapping): 85 | _mask_secrets(config[key]) 86 | elif key in ("password", "secret_access_key", "header", "identity"): 87 | config[key] = "*****" 88 | -------------------------------------------------------------------------------- /ch_tools/common/clickhouse/config/zookeeper.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | 4 | class ClickhouseZookeeperConfig: 5 | """ 6 | ZooKeeper section of ClickHouse server config. 7 | """ 8 | 9 | def __init__(self, config: dict) -> None: 10 | self._config = config 11 | 12 | @property 13 | def nodes(self) -> list: 14 | value = self._config["node"] 15 | if isinstance(value, list): 16 | return value 17 | 18 | return [value] 19 | 20 | @property 21 | def root(self) -> Optional[Any]: 22 | return self._config.get("root") 23 | 24 | @property 25 | def identity(self) -> Optional[Any]: 26 | return self._config.get("identity") 27 | -------------------------------------------------------------------------------- /ch_tools/common/dbaas.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class DbaasConfig: 5 | def __init__(self, config): 6 | self._config = config 7 | 8 | @property 9 | def vtype(self): 10 | return self._config["vtype"] 11 | 12 | @property 13 | def cloud_id(self): 14 | return self._config["cloud"]["cloud_ext_id"] 15 | 16 | @property 17 | def folder_id(self): 18 | return self._config["folder"]["folder_ext_id"] 19 | 20 | @property 21 | def cluster_id(self): 22 | return self._config["cluster_id"] 23 | 24 | @property 25 | def cluster_name(self): 26 | return self._config["cluster_name"] 27 | 28 | @property 29 | def created_at(self): 30 | return self._config["created_at"] 31 | 32 | @property 33 | def shard_count(self): 34 | subcluster = self._clickhouse_subcluster() 35 | return len(subcluster["shards"]) 36 | 37 | @property 38 | def host_count(self): 39 | return len(self._config["cluster_hosts"]) 40 | 41 | @property 42 | def clickhouse_host_count(self): 43 | subcluster = self._clickhouse_subcluster() 44 | count = 0 45 | for shard in subcluster["shards"].values(): 46 | count += len(shard["hosts"]) 47 | return count 48 | 49 | @property 50 | def shard_hosts(self): 51 | return self._config["shard_hosts"] 52 | 53 | @property 54 | def replicas(self): 55 | return [host for host in self.shard_hosts if host != self.fqdn] 56 | 57 | @property 58 | def fqdn(self): 59 | return self._config["fqdn"] 60 | 61 | @property 62 | def disk_type(self): 63 | return self._config["disk_type_id"] 64 | 65 | @property 66 | def disk_size(self): 67 | return self._config["space_limit"] 68 | 69 | @property 70 | def flavor(self): 71 | return self._config["flavor"]["name"] 72 | 73 | @property 74 | def cpu_fraction(self): 75 | return self._config["flavor"]["cpu_fraction"] 76 | 77 | @property 78 | def cpu_limit(self): 79 | return self._config["flavor"]["cpu_limit"] 80 | 81 | @property 82 | def cpu_guarantee(self): 83 | return self._config["flavor"]["cpu_guarantee"] 84 | 85 | @property 86 | def memory_limit(self): 87 | return self._config["flavor"]["memory_limit"] 88 | 89 | @property 90 | def memory_guarantee(self): 91 | return self._config["flavor"]["memory_guarantee"] 92 | 93 | def _clickhouse_subcluster(self): 94 | for subcluster in self._config["cluster"]["subclusters"].values(): 95 | if "clickhouse_cluster" in subcluster["roles"]: 96 | return subcluster 97 | 98 | @staticmethod 99 | def load(): 100 | with open("/etc/dbaas.conf", "r", encoding="utf-8") as file: 101 | return DbaasConfig(json.load(file)) 102 | -------------------------------------------------------------------------------- /ch_tools/common/process_pool.py: -------------------------------------------------------------------------------- 1 | from concurrent.futures import ThreadPoolExecutor, as_completed 2 | from dataclasses import dataclass 3 | from typing import Any, Callable, Dict, List 4 | 5 | from ch_tools.common import logging 6 | 7 | 8 | @dataclass 9 | class WorkerTask: 10 | indeifier: str 11 | function: Callable 12 | kwargs: Dict[str, Any] 13 | 14 | 15 | def execute_tasks_in_parallel( 16 | tasks: List[WorkerTask], max_workers: int = 4, keep_going: bool = False 17 | ) -> Dict[str, Any]: 18 | with ThreadPoolExecutor(max_workers=max_workers) as executor: 19 | # Can't use map function here. The map method returns a generator 20 | # and it is not possible to resume a generator after an exception occurs. 21 | # https://peps.python.org/pep-0255/#specification-generators-and-exception-propagation 22 | futures_to_indedifier = { 23 | executor.submit( 24 | task.function, 25 | **task.kwargs, 26 | ): task.indeifier 27 | for task in tasks 28 | } 29 | result: Dict[str, Any] = {} 30 | for future in as_completed(futures_to_indedifier): 31 | idf = futures_to_indedifier[future] 32 | try: 33 | result[idf] = future.result() 34 | except Exception as e: 35 | if keep_going: 36 | logging.warning( 37 | "Ignoring the exception due to while executing {} due to keep-going flag : {!r}", 38 | id, 39 | e, 40 | ) 41 | else: 42 | raise 43 | return result 44 | -------------------------------------------------------------------------------- /ch_tools/common/result.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from click import Context 4 | 5 | OK = 0 6 | WARNING = 1 7 | CRIT = 2 8 | 9 | 10 | class Result: 11 | def __init__(self, code: int = OK, message: str = "OK", verbose: str = "") -> None: 12 | self.code = code 13 | self.message = message 14 | self.verbose = verbose 15 | 16 | 17 | class Status: 18 | """Class for holding Juggler status.""" 19 | 20 | def __init__(self) -> None: 21 | self.code = 0 22 | self.text: list[str] = [] 23 | self.verbose: list[str] = [] 24 | 25 | @property 26 | def message(self): 27 | """Result message.""" 28 | # concatenate all received statuses 29 | message = ". ".join(self.text) 30 | if not message and self.code == 0: 31 | message = "OK" 32 | 33 | return message 34 | 35 | def set_code(self, new_code: int) -> None: 36 | """Set the code if it is greater than the current.""" 37 | if new_code > self.code: 38 | self.code = new_code 39 | 40 | def append(self, new_text: str) -> None: 41 | """Accumulate the status text.""" 42 | self.text.append(new_text) 43 | 44 | def add_verbose(self, new_text: str) -> None: 45 | """Add detail info.""" 46 | self.verbose.append(new_text) 47 | 48 | def report(self, ctx: Context) -> None: 49 | """Output formatted status message.""" 50 | message = self.message 51 | for rule in ctx.obj["config"]["monitoring"]["output"]["escaping_rules"]: 52 | message = re.sub(rule["pattern"], rule["replacement"], message) 53 | 54 | print(f"{self.code};{message}") 55 | if self.verbose: 56 | for v in self.verbose: 57 | if v: 58 | print("\n") 59 | print(v) 60 | -------------------------------------------------------------------------------- /ch_tools/common/type/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Useful types. 3 | """ 4 | 5 | from .typed_enum import IntEnum, StrEnum 6 | 7 | __all__ = [ 8 | "IntEnum", 9 | "StrEnum", 10 | ] 11 | -------------------------------------------------------------------------------- /ch_tools/common/type/typed_enum.py: -------------------------------------------------------------------------------- 1 | """ 2 | Typed enumerations returning their values on `__str__`. 3 | """ 4 | 5 | from enum import Enum 6 | 7 | 8 | class TypedEnum(Enum): 9 | """ 10 | Base class for typed enumerations. 11 | """ 12 | 13 | def __str__(self) -> str: 14 | return str(self.value) 15 | 16 | 17 | class StrEnum(str, TypedEnum): 18 | """ 19 | String-value enumeration. 20 | """ 21 | 22 | pass 23 | 24 | 25 | class IntEnum(int, TypedEnum): 26 | """ 27 | Integer-value enumeration. 28 | """ 29 | 30 | pass 31 | -------------------------------------------------------------------------------- /ch_tools/common/yaml.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from collections import OrderedDict 4 | 5 | import yaml 6 | import yaml.representer 7 | 8 | 9 | def dict_representer(dumper, data): 10 | return yaml.representer.SafeRepresenter.represent_dict(dumper, data.items()) 11 | 12 | 13 | def str_representer(dumper, data): 14 | if "\n" in data: 15 | style = "|" 16 | else: 17 | style = None 18 | 19 | return yaml.representer.SafeRepresenter.represent_scalar( 20 | dumper, "tag:yaml.org,2002:str", data, style=style 21 | ) 22 | 23 | 24 | yaml.add_representer(dict, dict_representer) 25 | yaml.add_representer(OrderedDict, dict_representer) 26 | yaml.add_representer(str, str_representer) 27 | 28 | 29 | def load_yaml(file_path): 30 | with open(os.path.expanduser(file_path), "r", encoding="utf-8") as f: 31 | return yaml.safe_load(f) 32 | 33 | 34 | def dump_yaml(data, file_path=None): 35 | if not file_path: 36 | return yaml.dump( 37 | data, default_flow_style=False, allow_unicode=True, width=sys.maxsize 38 | ) 39 | 40 | with open(os.path.expanduser(file_path), "w", encoding="utf-8") as f: 41 | yaml.dump( 42 | data, f, default_flow_style=False, allow_unicode=True, width=sys.maxsize 43 | ) 44 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/README.md: -------------------------------------------------------------------------------- 1 | # ch-monitoring 2 | 3 | ClickHouse monitoring tool. 4 | 5 | It provides monitoring for: 6 | - Backup: 7 | - Validity 8 | - Age 9 | - Count 10 | - Restoration failures 11 | - Presence of orphaned S3 backups 12 | - Core dumps 13 | - Old chunks on Distributed tables 14 | - Presence of geobase 15 | - Aliveness of ClickHouse Keeper 16 | - Count of errors in logs 17 | - Ping-ability of ClickHouse 18 | - Replication lag between replicas 19 | - Re-setup state 20 | - Read-only replicas 21 | - System metrics 22 | - TLS certificate validity 23 | - Size of S3 orphaned objects 24 | 25 | Each monitoring check outputs in following format: 26 | ``` 27 | ; 28 | ``` 29 | Where `` is one of 30 | - `0` - OK 31 | - `1` - WARN 32 | - `2` - CRIT 33 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/ch_tools/monrun_checks/__init__.py -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_core_dumps.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import time 3 | from datetime import datetime 4 | 5 | import click 6 | 7 | from ch_tools.common.result import Result 8 | 9 | 10 | @click.command("core-dumps") 11 | @click.option( 12 | "-t", 13 | "--core-directory", 14 | "core_directory", 15 | help="Core dump directory.", 16 | ) 17 | @click.option( 18 | "-n", 19 | "--crit-interval-seconds", 20 | "crit_seconds", 21 | type=int, 22 | help="Time interval to check in seconds.", 23 | ) 24 | def core_dumps_command(core_directory, crit_seconds): 25 | """ 26 | Check for core dumps. 27 | """ 28 | status = 0 29 | 30 | core_dir = pathlib.Path(core_directory) 31 | if core_dir.exists(): 32 | dumps = get_core_dumps(core_dir, crit_seconds) 33 | if dumps: 34 | status = 2 35 | else: 36 | # look for old dumps 37 | dumps = get_core_dumps(core_dir) 38 | if dumps: 39 | status = 1 40 | message = ";".join([f"{f} [{dt}]" for f, dt in dumps]) 41 | else: 42 | status = 1 43 | message = f"Core dump directory does not exist: {core_dir}" 44 | return Result(status, message or "OK") 45 | 46 | 47 | def get_core_dumps(core_dir, interval_seconds=None): 48 | """ 49 | Get core dumps dumped during the last `interval_seconds`. 50 | """ 51 | result = [] 52 | for f in core_dir.iterdir(): 53 | if not (f.is_file() and f.owner() == "clickhouse"): 54 | continue 55 | ctime = f.stat().st_ctime 56 | dt = datetime.fromtimestamp(ctime) 57 | if interval_seconds is None or (ctime > time.time() - interval_seconds): 58 | result.append((f, dt)) 59 | 60 | return result 61 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_dist_tables.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import time 3 | from urllib.parse import quote 4 | 5 | import click 6 | 7 | from ch_tools.common.clickhouse.client.clickhouse_client import clickhouse_client 8 | from ch_tools.common.result import Result 9 | 10 | 11 | @click.command("dist-tables") 12 | @click.option( 13 | "-c", "--critical", "crit", type=int, default=3600, help="Critical threshold." 14 | ) 15 | @click.option( 16 | "-w", "--warning", "warn", type=int, default=600, help="Warning threshold." 17 | ) 18 | @click.pass_context 19 | def dist_tables_command(ctx, crit, warn): 20 | """ 21 | Check for old chunks on Distributed tables. 22 | """ 23 | 24 | status = 0 25 | issues = [] 26 | 27 | ch_client = clickhouse_client(ctx) 28 | 29 | query = "SELECT database, name FROM system.tables WHERE engine = 'Distributed'" 30 | distributed_tables = ch_client.query_json_data(query=query, compact=False) 31 | for table in distributed_tables: 32 | tss = get_chunk_timestamps(table) 33 | if tss["broken"]: 34 | issues.append( 35 | f'{table["database"]}.{table["name"]}: {len(tss["broken"])} broken chunks' 36 | ) 37 | status = max(1, status) 38 | 39 | oldest_ts, oldest_fn = tss["root"] and tss["root"][0] or (None, None) 40 | if not oldest_ts: 41 | continue 42 | timespan = int(time.time()) - oldest_ts 43 | if timespan < warn: 44 | continue 45 | 46 | if timespan < crit: 47 | status = max(1, status) 48 | else: 49 | status = 2 50 | 51 | issues.append( 52 | f'{table["database"],}.{table["name"]}: {oldest_fn} ({int(timespan)})' 53 | ) 54 | 55 | message = ", ".join(issues) 56 | return Result(status, message or "OK") 57 | 58 | 59 | def get_chunk_timestamps(table): 60 | """ 61 | Return timestamps of files contained within dist table directory. 62 | """ 63 | path = pathlib.Path(get_table_path(table)) 64 | 65 | patterns = { 66 | "broken": "*/broken/*", 67 | "root": "*/*", 68 | } 69 | return { 70 | subdir: sorted( 71 | [(f.stat().st_atime, f.name) for f in path.glob(pattern) if f.is_file()] 72 | ) 73 | for subdir, pattern in patterns.items() 74 | } 75 | 76 | 77 | def get_table_path(table): 78 | """ 79 | Return path to table directory on file system. 80 | """ 81 | db_name = quote(table["database"], safe="") 82 | table_name = quote(table["name"], safe="") 83 | return f"/var/lib/clickhouse/data/{db_name}/{table_name}" 84 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_geobase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import click 4 | import requests 5 | 6 | from ch_tools.common.clickhouse.client.clickhouse_client import clickhouse_client 7 | from ch_tools.common.result import CRIT, OK, Result 8 | 9 | 10 | @click.command("geobase") 11 | @click.pass_context 12 | def geobase_command(ctx): 13 | """ 14 | Check that embedded geobase is configured. 15 | """ 16 | 17 | try: 18 | response = clickhouse_client(ctx).query_json_data( 19 | query="SELECT regionToName(CAST(1 AS UInt32))" 20 | )[0][0] 21 | expected = "Москва и Московская область" 22 | if response != expected: 23 | return Result( 24 | CRIT, f"Geobase error, expected ({expected}), but got ({response})" 25 | ) 26 | except requests.exceptions.HTTPError as exc: 27 | return Result(CRIT, repr(exc)) 28 | 29 | return Result(OK) 30 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_keeper.py: -------------------------------------------------------------------------------- 1 | import cloup 2 | from kazoo.client import KazooClient, KazooException 3 | from kazoo.handlers.threading import KazooTimeoutError 4 | 5 | from ch_tools.common.clickhouse.config import ClickhouseKeeperConfig 6 | from ch_tools.common.result import CRIT, OK, Result 7 | 8 | 9 | @cloup.command("keeper") 10 | @cloup.option( 11 | "-r", 12 | "--retries", 13 | "retries", 14 | type=int, 15 | default=3, 16 | help="Connection retries", 17 | ) 18 | @cloup.option( 19 | "-t", 20 | "--timeout", 21 | "timeout", 22 | type=int, 23 | default=10, 24 | help="Connection timeout (s)", 25 | ) 26 | @cloup.option( 27 | "-n", 28 | "--no-verify-ssl-certs", 29 | "no_verify_ssl_certs", 30 | is_flag=True, 31 | default=False, 32 | help="Allow unverified SSL certificates, e.g. self-signed ones", 33 | ) 34 | def keeper_command(retries: int, timeout: int, no_verify_ssl_certs: bool) -> Result: 35 | """ 36 | Check ClickHouse Keeper is alive. 37 | """ 38 | keeper_port, use_ssl = ClickhouseKeeperConfig.load().port_pair 39 | if not keeper_port: 40 | return Result(OK, "Disabled") 41 | 42 | client = KazooClient( 43 | f"127.0.0.1:{keeper_port}", 44 | connection_retry=retries, 45 | command_retry=retries, 46 | timeout=timeout, 47 | use_ssl=use_ssl, 48 | verify_certs=not no_verify_ssl_certs, 49 | ) 50 | try: 51 | client.start() 52 | client.get("/") 53 | client.stop() 54 | except (KazooException, KazooTimeoutError) as e: 55 | return Result(CRIT, repr(e)) 56 | 57 | return Result(OK) 58 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_log_errors.py: -------------------------------------------------------------------------------- 1 | import re 2 | from datetime import datetime, timedelta 3 | 4 | import click 5 | from file_read_backwards import FileReadBackwards 6 | 7 | from ch_tools.common.cli.parameters import RegexpParamType 8 | from ch_tools.common.result import CRIT, OK, WARNING, Result 9 | 10 | REGEXP = re.compile( 11 | r"^([0-9]{4}\.[0-9]{2}\.[0-9]{2}\ [0-9]{2}\:[0-9]{2}\:[0-9]{2}).*?<(Error|Fatal)>" 12 | ) 13 | 14 | 15 | @click.command("log-errors") 16 | @click.option("-c", "--critical", "crit", type=int, help="Critical threshold.") 17 | @click.option("-w", "--warning", "warn", type=int, help="Warning threshold.") 18 | @click.option( 19 | "-n", 20 | "--watch-seconds", 21 | "watch_seconds", 22 | type=int, 23 | help="Watch seconds.", 24 | ) 25 | @click.option( 26 | "-e", 27 | "--exclude", 28 | "exclude", 29 | type=RegexpParamType(), 30 | help="Excluded error.", 31 | ) 32 | @click.option( 33 | "-f", 34 | "--logfile", 35 | "logfile", 36 | help="Log file path.", 37 | ) 38 | def log_errors_command(crit, warn, watch_seconds, exclude, logfile): 39 | """ 40 | Check errors in ClickHouse server logs. 41 | """ 42 | datetime_start = datetime.now() - timedelta(seconds=watch_seconds) 43 | errors = 0 44 | 45 | with FileReadBackwards(logfile, encoding="utf-8") as f: 46 | for line in f: 47 | if exclude.search(line): 48 | continue 49 | match = REGEXP.match(line) 50 | if match is None: 51 | continue 52 | date = match.group(1) 53 | if datetime.strptime(date, "%Y.%m.%d %H:%M:%S") < datetime_start: 54 | break 55 | errors += 1 56 | 57 | msg = f"{errors} errors for last {watch_seconds} seconds" 58 | if errors >= crit: 59 | return Result(CRIT, msg) 60 | if errors >= warn: 61 | return Result(WARNING, msg) 62 | return Result(OK, f"OK, {msg}") 63 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_replication_lag.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from ch_tools.common.commands.replication_lag import estimate_replication_lag 4 | 5 | 6 | @click.command("replication-lag") 7 | @click.option( 8 | "-x", 9 | "--exec-critical", 10 | "xcrit", 11 | type=int, 12 | help="Critical threshold for one task execution.", 13 | ) 14 | @click.option( 15 | "-c", 16 | "--critical", 17 | "crit", 18 | type=int, 19 | help="Critical threshold for lag with errors.", 20 | ) 21 | @click.option("-w", "--warning", "warn", type=int, help="Warning threshold.") 22 | @click.option( 23 | "-M", 24 | "--merges-critical", 25 | "mcrit", 26 | type=click.FloatRange(0.0, 100.0), 27 | help="Critical threshold in percent of max_replicated_merges_in_queue.", 28 | ) 29 | @click.option( 30 | "-m", 31 | "--merges-warning", 32 | "mwarn", 33 | type=click.FloatRange(0.0, 100.0), 34 | help="Warning threshold in percent of max_replicated_merges_in_queue.", 35 | ) 36 | @click.option( 37 | "-v", 38 | "--verbose", 39 | "verbose", 40 | type=int, 41 | count=True, 42 | default=0, 43 | help="Show details about lag.", 44 | ) 45 | @click.pass_context 46 | def replication_lag_command(ctx, xcrit, crit, warn, mwarn, mcrit, verbose): 47 | return estimate_replication_lag(ctx, xcrit, crit, warn, mwarn, mcrit, verbose) 48 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_resetup_state.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import socket 4 | import subprocess 5 | 6 | import click 7 | import psutil 8 | import requests 9 | 10 | from ch_tools.common.clickhouse.client.clickhouse_client import clickhouse_credentials 11 | from ch_tools.common.clickhouse.config.path import CLICKHOUSE_RESETUP_CONFIG_PATH 12 | from ch_tools.common.result import CRIT, OK, Result 13 | from ch_tools.monrun_checks.exceptions import die 14 | 15 | 16 | @click.command("resetup-state") 17 | @click.option("-p", "--port", "port", type=int, help="ClickHouse HTTP(S) port to use.") 18 | @click.option("-s", "--ssl", "ssl", is_flag=True, help="Use HTTPS rather than HTTP.") 19 | @click.option("--ca_bundle", "ca_bundle", help="Path to CA bundle to use.") 20 | @click.pass_context 21 | def resetup_state_command(ctx, port, ssl, ca_bundle): 22 | """ 23 | Check state of resetup process. 24 | """ 25 | 26 | check_repsync_running() 27 | check_resetup_running() 28 | check_resetup_required() 29 | 30 | host = socket.getfqdn() 31 | if request(ctx, host, port, ssl, ca_bundle): 32 | return Result(CRIT, "ClickHouse is listening on ports reserved for resetup") 33 | 34 | if os.path.isfile(CLICKHOUSE_RESETUP_CONFIG_PATH): 35 | return Result(CRIT, "Detected resetup config, but ch-backup is not running") 36 | 37 | return Result(OK) 38 | 39 | 40 | def check_resetup_running(): 41 | """ 42 | Check for currently running `ch-backup restore-schema` 43 | """ 44 | for proc in psutil.process_iter(): 45 | if {"/usr/bin/ch-backup", "restore-schema"}.issubset(proc.cmdline()): 46 | die(0, "resetup is running") 47 | 48 | 49 | def check_repsync_running(): 50 | """ 51 | Check for currently running ch_wait_replication_sync.py script 52 | """ 53 | for proc in psutil.process_iter(): 54 | if {"/usr/local/yandex/ch_wait_replication_sync.py"}.issubset(proc.cmdline()): 55 | die(0, "resetup is running (wait for replication sync)") 56 | 57 | 58 | def check_resetup_required(): 59 | """ 60 | Check resetup conditions 61 | """ 62 | cmd = [ 63 | "sudo", 64 | "salt-call", 65 | "mdb_clickhouse.resetup_required", 66 | "--out", 67 | "json", 68 | "--local", 69 | ] 70 | output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) 71 | if json.loads(output)["local"]: 72 | die(0, "OK") 73 | 74 | 75 | def request(ctx, host, port, ssl, ca_bundle, query=None): 76 | """ 77 | Send request to ClickHouse. 78 | """ 79 | try: 80 | protocol = "https" if ssl else "http" 81 | verify = ca_bundle if ca_bundle else ssl 82 | params = {} 83 | if query: 84 | params["query"] = query 85 | 86 | user, password = clickhouse_credentials(ctx) 87 | r = requests.get( 88 | f"{protocol}://{host}:{port}", 89 | params=params, 90 | headers={ 91 | "X-ClickHouse-User": user, 92 | "X-ClickHouse-Key": password, 93 | }, 94 | timeout=1, 95 | verify=verify, 96 | ) 97 | return r.status_code == 200 and r.text.strip() == "Ok." 98 | except Exception: 99 | die(0, "OK") 100 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_ro_replica.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from ch_tools.common.clickhouse.client.clickhouse_client import clickhouse_client 4 | from ch_tools.common.result import CRIT, OK, Result 5 | 6 | 7 | @click.command("ro-replica") 8 | @click.option( 9 | "-v", 10 | "--verbose", 11 | is_flag=True, 12 | help="Show details about ro tables.", 13 | ) 14 | @click.pass_context 15 | def ro_replica_command(ctx, verbose=False): 16 | """ 17 | Check for readonly replicated tables. 18 | """ 19 | query = """ 20 | SELECT database, table, replica_path, last_queue_update_exception, zookeeper_exception 21 | FROM system.replicas WHERE is_readonly 22 | """ 23 | response = clickhouse_client(ctx).query_json_data(query, compact=False) 24 | if response: 25 | msg_verbose = "" 26 | 27 | if verbose: 28 | headers = [ 29 | "database", 30 | "table", 31 | "replica_path", 32 | "last_queue_update_exception", 33 | "zookeeper_exception", 34 | ] 35 | 36 | formatted_data = [] 37 | 38 | for item in response: 39 | formatted_row = "\n".join( 40 | [ 41 | f"{header}: {item[header]}" 42 | for header in headers 43 | if header in item 44 | ] 45 | ) 46 | formatted_data.append(formatted_row) 47 | 48 | msg_verbose = "\n\n".join(data for data in formatted_data) 49 | 50 | tables_str = ", ".join( 51 | f"{item['database']}.{item['table']}" for item in response 52 | ) 53 | 54 | return Result( 55 | CRIT, f"Readonly replica tables: {tables_str}", verbose=msg_verbose 56 | ) 57 | 58 | return Result(OK) 59 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_s3_backup_orphaned.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from ch_tools.common.backup import get_orphaned_chs3_backups 4 | from ch_tools.common.result import OK, WARNING, Result 5 | 6 | 7 | @click.command("orphaned-backups") 8 | def orphaned_backups_command(): 9 | """ 10 | Check for orphaned backups. 11 | """ 12 | orphaned_backups = get_orphaned_chs3_backups() 13 | if not orphaned_backups: 14 | return Result(OK) 15 | 16 | orphaned_backups_str = ", ".join(orphaned_backups[:3]) 17 | if len(orphaned_backups) > 3: 18 | orphaned_backups_str += ", ..." 19 | 20 | return Result( 21 | WARNING, 22 | f"There are {len(orphaned_backups)} orphaned S3 backups: {orphaned_backups_str}", 23 | ) 24 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_s3_credentials_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from datetime import timedelta 4 | 5 | import requests 6 | from click import pass_context 7 | from cloup import command, option 8 | 9 | from ch_tools.common import logging 10 | from ch_tools.common.clickhouse.config.path import ( 11 | CLICKHOUSE_RESETUP_CONFIG_PATH, 12 | CLICKHOUSE_S3_CREDENTIALS_CONFIG_PATH, 13 | ) 14 | from ch_tools.common.result import CRIT, OK, WARNING, Result 15 | 16 | 17 | @command("s3-credentials-config") 18 | @option( 19 | "-p", 20 | "--present/--missing", 21 | default=False, 22 | is_flag=True, 23 | help="Whether S3 credentials config should be present or not.", 24 | ) 25 | @pass_context 26 | def s3_credentials_configs_command(ctx, present): 27 | """ 28 | Check S3 credentials config. 29 | """ 30 | # pylint: disable=too-many-return-statements 31 | try: 32 | if not present: 33 | if not os.path.exists(CLICKHOUSE_S3_CREDENTIALS_CONFIG_PATH): 34 | return Result(OK) 35 | return Result(CRIT, "S3 credentials config exists, but shouldn't") 36 | 37 | if os.path.isfile(CLICKHOUSE_RESETUP_CONFIG_PATH): 38 | return Result(OK, "Skipped as resetup is in progress") 39 | 40 | if os.path.exists(CLICKHOUSE_S3_CREDENTIALS_CONFIG_PATH): 41 | delta = timedelta( 42 | seconds=time.time() 43 | - os.path.getmtime(CLICKHOUSE_S3_CREDENTIALS_CONFIG_PATH) 44 | ) 45 | if delta < timedelta(hours=2): 46 | return Result(OK) 47 | if delta < timedelta(hours=4): 48 | return Result( 49 | WARNING, 50 | f"S3 token expire in {_delta_to_hours(timedelta(hours=12) - delta)} hours", 51 | ) 52 | 53 | if delta < timedelta(hours=12): 54 | msg = f"S3 token expire in {_delta_to_hours(timedelta(hours=12) - delta)} hours" 55 | else: 56 | msg = f"S3 token expired {_delta_to_hours(delta - timedelta(hours=12))} hours ago" 57 | else: 58 | msg = "S3 credentials config is missing" 59 | 60 | endpoint = ctx.obj["config"]["cloud"]["metadata_service_endpoint"] 61 | code = _request_token(endpoint).status_code 62 | if code == 404: 63 | if "default" in requests.get( 64 | f"{endpoint}/computeMetadata/v1/instance/?recursive=true", 65 | headers={"Metadata-Flavor": "Google"}, 66 | timeout=60, 67 | ).json().get("serviceAccounts", {}): 68 | return Result(WARNING, "service account deleted") 69 | 70 | return Result(CRIT, "service account not linked") 71 | 72 | return Result(CRIT, f"{msg}, IAM code {code}") 73 | 74 | except Exception: 75 | logging.exception("Failed to check S3 credentials config") 76 | return Result(CRIT, "Internal error") 77 | 78 | 79 | def _request_token(metadata_service_endpoint): 80 | return requests.get( 81 | f"{metadata_service_endpoint}/computeMetadata/v1/instance/service-accounts/default/token", 82 | headers={"Metadata-Flavor": "Google"}, 83 | timeout=60, 84 | ) 85 | 86 | 87 | def _delta_to_hours(delta: timedelta) -> str: 88 | return f"{(delta.total_seconds() / 3600):.2f}" 89 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_system_queues.py: -------------------------------------------------------------------------------- 1 | from cloup import command, option, pass_context 2 | 3 | from ch_tools.common.clickhouse.client.clickhouse_client import clickhouse_client 4 | from ch_tools.common.result import CRIT, OK, WARNING, Result 5 | 6 | 7 | @command("system-queues") 8 | @option("--merges-in-queue-warn", "merges_in_queue_warn", type=int) 9 | @option("--merges-in-queue-crit", "merges_in_queue_crit", type=int) 10 | @option("--future-parts-warn", "future_parts_warn", type=int) 11 | @option("--future-parts-crit", "future_parts_crit", type=int) 12 | @option("--parts-to-check-warn", "parts_to_check_warn", type=int) 13 | @option("--parts-to-check-crit", "parts_to_check_crit", type=int) 14 | @option("--queue-size-warn", "queue_size_warn", type=int) 15 | @option("--queue-size-crit", "queue_size_crit", type=int) 16 | @option("--inserts-in-queue-warn", "inserts_in_queue_warn", type=int) 17 | @option("--inserts-in-queue-crit", "inserts_in_queue_crit", type=int) 18 | @pass_context 19 | def system_queues_command( 20 | ctx, 21 | merges_in_queue_warn, 22 | merges_in_queue_crit, 23 | future_parts_warn, 24 | future_parts_crit, 25 | parts_to_check_warn, 26 | parts_to_check_crit, 27 | queue_size_warn, 28 | queue_size_crit, 29 | inserts_in_queue_warn, 30 | inserts_in_queue_crit, 31 | ): 32 | """ 33 | Check system queues. 34 | """ 35 | thresholds = [ 36 | ("merges_in_queue", merges_in_queue_warn, merges_in_queue_crit), 37 | ("future_parts", future_parts_warn, future_parts_crit), 38 | ("parts_to_check", parts_to_check_warn, parts_to_check_crit), 39 | ("queue_size", queue_size_warn, queue_size_crit), 40 | ("inserts_in_queue", inserts_in_queue_warn, inserts_in_queue_crit), 41 | ] 42 | 43 | issues = [] 44 | for item in _get_metrics(ctx): 45 | table_full_name = f"{item['database']}.{item['table']}" 46 | for parameter, warn, crit in thresholds: 47 | value = item[parameter] 48 | if value > crit: 49 | issues.append( 50 | ( 51 | CRIT, 52 | f"{table_full_name}: {parameter} {value} > {crit} (crit);", 53 | ) 54 | ) 55 | elif value > warn: 56 | issues.append( 57 | ( 58 | WARNING, 59 | f"{table_full_name}: {parameter} {value} > {warn} (warn);", 60 | ) 61 | ) 62 | 63 | if issues: 64 | issues.sort(reverse=True, key=lambda x: x[0]) 65 | status = issues[0][0] 66 | message = " ".join(x[1] for x in issues) 67 | return Result(status, message) 68 | 69 | return Result(OK) 70 | 71 | 72 | def _get_metrics(ctx): 73 | """ 74 | Select and return metrics form system.replicas. 75 | """ 76 | query = ( 77 | "SELECT database, table, future_parts, parts_to_check, queue_size," 78 | " inserts_in_queue, merges_in_queue FROM system.replicas" 79 | ) 80 | return clickhouse_client(ctx).query_json_data(query=query, compact=False) 81 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/ch_tls.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import click 4 | 5 | from ch_tools.common.clickhouse.client.clickhouse_client import ( 6 | ClickhousePort, 7 | clickhouse_client, 8 | ) 9 | from ch_tools.common.result import Result 10 | from ch_tools.common.tls import check_cert_on_ports 11 | 12 | CERTIFICATE_PATH = "/etc/clickhouse-server/ssl/server.crt" 13 | 14 | 15 | @click.command("tls") 16 | @click.option("-c", "--critical", "crit", type=int, help="Critical threshold.") 17 | @click.option("-w", "--warning", "warn", type=int, help="Warning threshold.") 18 | @click.option( 19 | "-p", 20 | "--ports", 21 | "ports", 22 | type=str, 23 | default=None, 24 | help="Comma separated list of ports. By default read from ClickHouse config", 25 | ) 26 | @click.option("--chain", "chain", is_flag=True, help="Verify certificate chain.") 27 | @click.pass_context 28 | def tls_command( 29 | ctx: click.Context, 30 | crit: int, 31 | warn: int, 32 | ports: Optional[str], 33 | chain: bool, 34 | ) -> Result: 35 | """ 36 | Check TLS certificate for expiration and that actual cert from fs used. 37 | """ 38 | return check_cert_on_ports( 39 | get_ports(ctx, ports), crit, warn, chain, CERTIFICATE_PATH 40 | ) 41 | 42 | 43 | def get_ports(ctx: click.Context, ports: Optional[str]) -> List[str]: 44 | if ports: 45 | return ports.split(",") 46 | client = clickhouse_client(ctx) 47 | result = [] 48 | if client.check_port(ClickhousePort.HTTPS): 49 | result.append(client.get_port(ClickhousePort.HTTPS)) 50 | if client.check_port(ClickhousePort.TCP_SECURE): 51 | result.append(client.get_port(ClickhousePort.TCP_SECURE)) 52 | return [str(port) for port in result] 53 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/clickhouse_info.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from ch_tools.common.clickhouse.client.clickhouse_client import clickhouse_client 4 | 5 | 6 | class ClickhouseInfo: 7 | @classmethod 8 | @functools.lru_cache(maxsize=1) 9 | def get_replicas(cls, ctx): 10 | """ 11 | Get hostnames of replicas. 12 | """ 13 | cluster = cls.get_cluster(ctx) 14 | query = f""" 15 | SELECT host_name 16 | FROM system.clusters 17 | WHERE cluster = '{cluster}' 18 | AND shard_num = (SELECT shard_num FROM system.clusters 19 | WHERE host_name = hostName() AND cluster = '{cluster}') 20 | """ 21 | return [row[0] for row in clickhouse_client(ctx).query_json_data(query=query)] 22 | 23 | @classmethod 24 | @functools.lru_cache(maxsize=1) 25 | def get_cluster(cls, ctx): 26 | """ 27 | Get cluster identifier. 28 | """ 29 | query = "SELECT substitution FROM system.macros WHERE macro = 'cluster'" 30 | return clickhouse_client(ctx).query_json_data(query=query)[0][0] 31 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/exceptions.py: -------------------------------------------------------------------------------- 1 | from requests import RequestException 2 | 3 | from ch_tools.common.result import Status 4 | 5 | 6 | def user_warning(exc: UserWarning, status: Status) -> Status: 7 | code, message = exc.args 8 | status.append(message) 9 | status.set_code(code) 10 | return status 11 | 12 | 13 | def unknown_exception(exc: Exception, status: Status) -> Status: 14 | status.append(f"Unknown error: {exc}") 15 | status.set_code(1) 16 | return status 17 | 18 | 19 | def requests_error(exc: RequestException, status: Status) -> Status: 20 | status.append(f"ClickHouse connection error: {exc.__class__.__name__}") 21 | status.set_code(1) 22 | return status 23 | 24 | 25 | EXC_MAP = { 26 | UserWarning: user_warning, 27 | RequestException: requests_error, 28 | } 29 | 30 | 31 | def translate_to_status(exc: Exception, status: Status) -> Status: 32 | handler = unknown_exception 33 | if exc.__class__ in EXC_MAP: 34 | handler = EXC_MAP[exc.__class__] # type: ignore 35 | return handler(exc, status) 36 | 37 | 38 | def die(status_code: int, message: str) -> None: 39 | raise UserWarning(status_code, message) 40 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/status.py: -------------------------------------------------------------------------------- 1 | import click 2 | import tabulate 3 | 4 | DEFAULT_COLOR = "\033[0m" 5 | 6 | COLOR_MAP = { 7 | 0: "\033[92m", 8 | 1: "\033[93m", 9 | 2: "\033[91m", 10 | } 11 | 12 | 13 | def status_command(commands): 14 | @click.command("status") 15 | @click.pass_context 16 | def status_impl(ctx): 17 | """ 18 | Perform all checks. 19 | """ 20 | config = ctx.obj["config"]["ch-monitoring"] 21 | ctx.obj["status_mode"] = True 22 | ctx.default_map = config 23 | 24 | checks_status = [] 25 | for cmd in commands: 26 | if not config.get(cmd.name, {}).get("@disabled"): 27 | status = ctx.invoke(cmd) 28 | checks_status.append( 29 | ( 30 | cmd.name, 31 | f"{COLOR_MAP[status.code]}{status.message}{DEFAULT_COLOR}", 32 | ) 33 | ) 34 | 35 | print(tabulate.tabulate(checks_status)) 36 | 37 | return status_impl 38 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | from click import Context 4 | 5 | from ch_tools.common import logging 6 | from ch_tools.common.clickhouse.client.clickhouse_client import clickhouse_client 7 | 8 | 9 | def get_uptime(ctx: Context) -> timedelta: 10 | try: 11 | return clickhouse_client(ctx).get_uptime() 12 | except Exception: 13 | logging.warning("Failed to get ClickHouse uptime", exc_info=True) 14 | return timedelta() 15 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks_keeper/README.md: -------------------------------------------------------------------------------- 1 | # keeper-monitoring 2 | 3 | ClickHouse Keeper / ZooKeeper monitoring tool. 4 | 5 | It provides monitoring for: 6 | - Aliveness of Keeper 7 | - Average latency 8 | - Minimum latency 9 | - Maximum latency 10 | - Request queue size 11 | - Open file descriptors 12 | - Version of Keeper 13 | - Presence of snapshots 14 | - Presence of `NullPointerException` in logs for 24 hours 15 | 16 | Each monitoring check outputs in following format: 17 | ``` 18 | ; 19 | ``` 20 | Where `` is one of 21 | - `0` - OK 22 | - `1` - WARN 23 | - `2` - CRIT 24 | 25 | -------------------------------------------------------------------------------- /ch_tools/monrun_checks_keeper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/ch_tools/monrun_checks_keeper/__init__.py -------------------------------------------------------------------------------- /ch_tools/monrun_checks_keeper/status.py: -------------------------------------------------------------------------------- 1 | import click 2 | import tabulate 3 | 4 | DEFAULT_COLOR = "\033[0m" 5 | 6 | COLOR_MAP = { 7 | 0: "\033[92m", 8 | 1: "\033[93m", 9 | 2: "\033[91m", 10 | } 11 | 12 | 13 | def status_command(commands): 14 | @click.command("status") 15 | @click.pass_context 16 | def status_impl(ctx): 17 | """ 18 | Perform all checks. 19 | """ 20 | config = ctx.obj["config"]["keeper-monitoring"] 21 | ctx.obj.update({"status_mode": True}) 22 | ctx.default_map = config 23 | 24 | checks_status = [] 25 | for cmd in commands: 26 | if not config.get(cmd.name, {}).get("@disabled"): 27 | status = ctx.invoke(cmd) 28 | checks_status.append( 29 | ( 30 | cmd.name, 31 | f"{COLOR_MAP[status.code]}{status.message}{DEFAULT_COLOR}", 32 | ) 33 | ) 34 | 35 | print(tabulate.tabulate(checks_status)) 36 | 37 | return status_impl 38 | -------------------------------------------------------------------------------- /debian/.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | .debhelper/ 3 | clickhouse-tools 4 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | clickhouse-tools (1.0.0) UNRELEASED; urgency=low 2 | 3 | * Initial Release. 4 | 5 | -- Dmitry Starov Thu, 01 Jun 2023 16:00:00 +0300 6 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 10 2 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: clickhouse-tools 2 | Section: database 3 | Priority: optional 4 | Maintainer: Yandex LLC 5 | Uploaders: Alexander Burmak , 6 | Dmitry Starov , 7 | Anton Ivashkin , 8 | Grigory Pervakov , 9 | Petr Nuzhnov , 10 | Egor Medvedev , 11 | Aleksei Filatov , 12 | Evgenii Kopanev , 13 | Mikhail Kot 14 | Build-Depends: debhelper (>= 10~), python3, python3-venv, python3-pip, python3-setuptools 15 | Standards-Version: 4.1.4 16 | Homepage: https://github.com/yandex/ch-tools 17 | Vcs-Browser: https://github.com/yandex/ch-tools.git 18 | Vcs-Git: git://github.com:yandex/ch-tools.git 19 | X-Python3-Version: >= 3.9 20 | 21 | Package: clickhouse-tools 22 | Architecture: any 23 | Description: A set of tools for administration and diagnostics of ClickHouse DBMS. 24 | Depends: ${python:Depends} 25 | Replaces: mdb-ch-tools, ch-tools 26 | Conflicts: mdb-ch-tools, ch-tools 27 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: clickhouse-tools 3 | Source: https://github.com/yandex/ch-tools 4 | 5 | Files: * 6 | Copyright: 2023 Yandex LLC 7 | License: MIT 8 | The MIT License (MIT) 9 | Copyright (c) 2023 YANDEX LLC 10 | . 11 | Permission is hereby granted, free of charge, to any person obtaining a copy 12 | of this software and associated documentation files (the "Software"), to deal 13 | in the Software without restriction, including without limitation the rights 14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | copies of the Software, and to permit persons to whom the Software is 16 | furnished to do so, subject to the following conditions: 17 | . 18 | The above copyright notice and this permission notice shall be included in 19 | all copies or substantial portions of the Software. 20 | . 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 | THE SOFTWARE. 28 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | PYTHON_MAJOR := $(shell python3 -c 'import sys; print(sys.version_info[0])') 4 | PYTHON_MINOR := $(shell python3 -c 'import sys; print(sys.version_info[1])') 5 | 6 | PYTHON_FROM := $(PYTHON_MAJOR).$(PYTHON_MINOR) 7 | PYTHON_TO := $(PYTHON_MAJOR).$(shell echo $$(( $(PYTHON_MINOR) + 1 ))) 8 | 9 | # Use conditional python3 dependency because package for Bionic requires python3.6, 10 | # but package for Jammy requires python3.10. 11 | # 12 | # All this is due to the fact that we put the entire virtual environment in a deb package 13 | # and that venv links to the system python 14 | SUBSTVARS := -Vpython:Depends="python3 (>= $(PYTHON_FROM)), python3 (<< $(PYTHON_TO))" 15 | 16 | %: 17 | dh $@ 18 | 19 | override_dh_auto_build: 20 | dh_auto_build 21 | 22 | override_dh_gencontrol: 23 | dh_gencontrol -- $(SUBSTVARS) 24 | 25 | override_dh_auto_clean: ; 26 | 27 | override_dh_strip: ; 28 | 29 | override_dh_shlibdeps: ; 30 | 31 | override_dh_auto_test: ; 32 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (native) 2 | -------------------------------------------------------------------------------- /resources/completion/ch-monitoring-completion.bash: -------------------------------------------------------------------------------- 1 | # Generated by "_CH_MONITORING_COMPLETE=bash_source ch-monitoring" 2 | _ch_monitoring_completion() { 3 | local IFS=$'\n' 4 | local response 5 | 6 | response=$(env COMP_WORDS="${COMP_WORDS[*]}" COMP_CWORD=$COMP_CWORD _CH_MONITORING_COMPLETE=bash_complete $1) 7 | 8 | for completion in $response; do 9 | IFS=',' read type value <<< "$completion" 10 | 11 | if [[ $type == 'dir' ]]; then 12 | COMREPLY=() 13 | compopt -o dirnames 14 | elif [[ $type == 'file' ]]; then 15 | COMREPLY=() 16 | compopt -o default 17 | elif [[ $type == 'plain' ]]; then 18 | COMPREPLY+=($value) 19 | fi 20 | done 21 | 22 | return 0 23 | } 24 | 25 | _ch_monitoring_completion_setup() { 26 | complete -o nosort -F _ch_monitoring_completion ch-monitoring 27 | } 28 | 29 | _ch_monitoring_completion_setup; 30 | -------------------------------------------------------------------------------- /resources/completion/chadmin-completion.bash: -------------------------------------------------------------------------------- 1 | # Generated by "_CHADMIN_COMPLETE=bash_source chadmin" 2 | _chadmin_completion() { 3 | local IFS=$'\n' 4 | local response 5 | 6 | response=$(env COMP_WORDS="${COMP_WORDS[*]}" COMP_CWORD=$COMP_CWORD _CHADMIN_COMPLETE=bash_complete $1) 7 | 8 | for completion in $response; do 9 | IFS=',' read type value <<< "$completion" 10 | 11 | if [[ $type == 'dir' ]]; then 12 | COMREPLY=() 13 | compopt -o dirnames 14 | elif [[ $type == 'file' ]]; then 15 | COMREPLY=() 16 | compopt -o default 17 | elif [[ $type == 'plain' ]]; then 18 | COMPREPLY+=($value) 19 | fi 20 | done 21 | 22 | return 0 23 | } 24 | 25 | _chadmin_completion_setup() { 26 | complete -o nosort -F _chadmin_completion chadmin 27 | } 28 | 29 | _chadmin_completion_setup; 30 | -------------------------------------------------------------------------------- /resources/completion/keeper-monitoring-completion.bash: -------------------------------------------------------------------------------- 1 | # Generated by "_KEEPER_MONITORING_COMPLETE=bash_source keeper-monitoring" 2 | _keeper_monitoring_completion() { 3 | local IFS=$'\n' 4 | local response 5 | 6 | response=$(env COMP_WORDS="${COMP_WORDS[*]}" COMP_CWORD=$COMP_CWORD _KEEPER_MONITORING_COMPLETE=bash_complete $1) 7 | 8 | for completion in $response; do 9 | IFS=',' read type value <<< "$completion" 10 | 11 | if [[ $type == 'dir' ]]; then 12 | COMREPLY=() 13 | compopt -o dirnames 14 | elif [[ $type == 'file' ]]; then 15 | COMREPLY=() 16 | compopt -o default 17 | elif [[ $type == 'plain' ]]; then 18 | COMPREPLY+=($value) 19 | fi 20 | done 21 | 22 | return 0 23 | } 24 | 25 | _keeper_monitoring_completion_setup() { 26 | complete -o nosort -F _keeper_monitoring_completion keeper-monitoring 27 | } 28 | 29 | _keeper_monitoring_completion_setup; 30 | -------------------------------------------------------------------------------- /resources/logrotate/chadmin.logrotate: -------------------------------------------------------------------------------- 1 | /var/log/chadmin/chadmin.log { 2 | rotate 5 3 | monthly 4 | compress 5 | missingok 6 | nodateext 7 | notifempty 8 | copytruncate 9 | size 1M 10 | } 11 | -------------------------------------------------------------------------------- /resources/logrotate/clickhouse-monitoring.logrotate: -------------------------------------------------------------------------------- 1 | /var/log/clickhouse-monitoring/clickhouse-monitoring.log { 2 | rotate 7 3 | daily 4 | compress 5 | missingok 6 | nodateext 7 | copytruncate 8 | } 9 | -------------------------------------------------------------------------------- /resources/logrotate/keeper-monitoring.logrotate: -------------------------------------------------------------------------------- 1 | /var/log/keeper-monitoring/keeper-monitoring.log { 2 | rotate 7 3 | daily 4 | compress 5 | missingok 6 | nodateext 7 | copytruncate 8 | } 9 | -------------------------------------------------------------------------------- /tests/configuration.py: -------------------------------------------------------------------------------- 1 | """ 2 | Variables that influence testing behavior are defined here. 3 | """ 4 | 5 | import os 6 | 7 | 8 | def create() -> dict: 9 | """ 10 | Create test configuration (non-idempotent function). 11 | """ 12 | network_name = "ch_tools_test" 13 | services: dict = { 14 | "clickhouse": { 15 | "instances": ["clickhouse01", "clickhouse02"], 16 | "expose": { 17 | "http": 8123, 18 | "clickhouse": 9000, 19 | "keeper": 2281, 20 | }, 21 | "depends_on": ["zookeeper"], 22 | "args": { 23 | "CLICKHOUSE_VERSION": "${CLICKHOUSE_VERSION:-latest}", 24 | }, 25 | "db": { 26 | "user": "reader", 27 | "password": "reader_password", 28 | }, 29 | }, 30 | "zookeeper": { 31 | "instances": ["zookeeper01"], 32 | "expose": { 33 | "tcp": 2181, 34 | }, 35 | }, 36 | "minio": { 37 | "instances": ["minio01"], 38 | "expose": { 39 | "http": 9000, 40 | }, 41 | }, 42 | "http_mock": { 43 | "instances": ["http_mock01"], 44 | "expose": { 45 | "tcp": 8080, 46 | }, 47 | }, 48 | } 49 | 50 | s3 = { 51 | "endpoint": "http://minio01:9000", 52 | "port": 9000, 53 | "access_secret_key": "test_secret", 54 | "access_key_id": "test_key", 55 | "bucket": "cloud-storage-test", 56 | "boto_config": { 57 | "addressing_style": "auto", 58 | "region_name": "us-east-1", 59 | }, 60 | "container": "minio01", 61 | } 62 | 63 | return { 64 | "ch_version": os.getenv("CLICKHOUSE_VERSION", "latest"), 65 | "images_dir": "images", 66 | "staging_dir": "staging", 67 | "network_name": network_name, 68 | "s3": s3, 69 | "ch_backup": { 70 | "encrypt_key": "test_encrypt", 71 | }, 72 | "services": services, 73 | "dbaas_conf": _dbaas_conf(services, network_name), 74 | } 75 | 76 | 77 | def _dbaas_conf(services: dict, network_name: str) -> dict: 78 | """ 79 | Generate dbaas.conf contents. 80 | """ 81 | 82 | def _fqdn(instance_name: str) -> str: 83 | return f"{instance_name}.{network_name}" 84 | 85 | return { 86 | "cluster_id": "cid1", 87 | "created_at": "2022-01-01T12:00:00.000000+03:00", 88 | "cluster": { 89 | "subclusters": { 90 | "subcid1": { 91 | "roles": ["clickhouse_cluster"], 92 | "shards": { 93 | "shard_id1": { 94 | "name": "shard1", 95 | "hosts": { 96 | _fqdn(instance_name): {} 97 | for instance_name in services["clickhouse"]["instances"] 98 | }, 99 | }, 100 | }, 101 | }, 102 | "subcid2": { 103 | "roles": ["zk"], 104 | "hosts": { 105 | _fqdn(services["zookeeper"]["instances"][0]): {}, 106 | }, 107 | }, 108 | }, 109 | }, 110 | } 111 | -------------------------------------------------------------------------------- /tests/environment.py: -------------------------------------------------------------------------------- 1 | """ 2 | Behave entry point. 3 | """ 4 | 5 | import re 6 | import sys 7 | from typing import Optional 8 | 9 | import env_control 10 | from behave import model 11 | from modules.logs import save_logs 12 | from modules.typing import ContextT 13 | from modules.utils import version_ge, version_lt 14 | 15 | from ch_tools.common import logging 16 | from ch_tools.common.config import load_config 17 | 18 | try: 19 | import ipdb as pdb 20 | except ImportError: 21 | import pdb # type: ignore 22 | 23 | 24 | def before_all(context: ContextT) -> None: 25 | """ 26 | Prepare environment for tests. 27 | """ 28 | config = load_config() 29 | logging.configure(config["loguru"], "test") 30 | logging.add( 31 | sys.stdout, 32 | level="INFO", 33 | format_="{time:YYYY-MM-DD HH:mm:ss,SSS} [{level:8}]:\t{message}", 34 | ) 35 | if not context.config.userdata.getbool("skip_setup"): 36 | env_control.create(context) 37 | 38 | 39 | def before_feature(context: ContextT, _feature: model.Feature) -> None: 40 | """ 41 | Cleanup function executing per feature. 42 | """ 43 | if "dependent-scenarios" in _feature.tags: 44 | env_control.restart(context) 45 | 46 | 47 | def before_scenario(context: ContextT, scenario: model.Scenario) -> None: 48 | """ 49 | Cleanup function executing per scenario. 50 | """ 51 | if "dependent-scenarios" not in context.feature.tags and _check_tags( 52 | context, scenario 53 | ): 54 | env_control.restart(context) 55 | 56 | 57 | def after_step(context: ContextT, step: model.Step) -> None: 58 | """ 59 | Save logs after failed step. 60 | """ 61 | if step.status == "failed": 62 | save_logs(context) 63 | if context.config.userdata.getbool("debug"): 64 | pdb.post_mortem(step.exc_traceback) 65 | 66 | 67 | def after_all(context: ContextT) -> None: 68 | """ 69 | Clean up. 70 | """ 71 | if (context.failed and not context.aborted) and context.config.userdata.getbool( 72 | "no_stop_on_fail" 73 | ): 74 | logging.info("Not stopping containers on failure as requested") 75 | return 76 | env_control.stop(context) 77 | 78 | 79 | def _check_tags(context: ContextT, scenario: model.Scenario) -> bool: 80 | ch_version = context.conf["ch_version"] 81 | 82 | require_version = _parse_version_tag(scenario.tags, "require_version") 83 | if require_version: 84 | if not version_ge(ch_version, require_version): 85 | logging.info("Skipping scenario due to require_version mismatch") 86 | scenario.mark_skipped() 87 | return False 88 | 89 | require_lt_version = _parse_version_tag(scenario.tags, "require_version_less_than") 90 | if require_lt_version: 91 | if not version_lt(ch_version, require_lt_version): 92 | logging.info("Skipping scenario due to require_version_less_than mismatch") 93 | scenario.mark_skipped() 94 | return False 95 | 96 | if "skip" in scenario.tags: 97 | logging.info("Skipping scenario due to skip tag") 98 | scenario.mark_skipped() 99 | return False 100 | 101 | return True 102 | 103 | 104 | def _parse_version_tag(tags: list, prefix: str) -> Optional[str]: 105 | tag_pattern = prefix + r"_(?P[\d\.]+)" 106 | for tag in tags: 107 | match = re.fullmatch(tag_pattern, tag) 108 | if match: 109 | return match.group("version") 110 | 111 | return None 112 | -------------------------------------------------------------------------------- /tests/features/chadmin_perf_diag.feature: -------------------------------------------------------------------------------- 1 | Feature: chadmin performance diagnostics. 2 | 3 | Background: 4 | Given default configuration 5 | And a working s3 6 | And a working zookeeper 7 | And a working clickhouse on clickhouse01 8 | And a working clickhouse on clickhouse02 9 | 10 | @require_version_23.8 11 | Scenario: Sanity checks: 12 | When we execute command on clickhouse01 13 | """ 14 | chadmin flamegraph collect-by-interval --trace-type CPU 15 | """ 16 | And we execute command on clickhouse01 17 | """ 18 | chadmin flamegraph setup --trace-type MemorySample 19 | """ 20 | And we execute command on clickhouse01 21 | """ 22 | clickhouse client --query-id 123 --query ' SELECT count(*) FROM numbers(4000000) AS l LEFT JOIN (select rand32()%1000000 AS number FROM numbers(40000000)) AS r ON l.number=r.number SETTINGS use_query_cache=0;' 23 | """ 24 | And we execute command on clickhouse01 25 | """ 26 | chadmin flamegraph collect-by-query --query-id 123 --trace-type MemorySample 27 | """ 28 | And we execute command on clickhouse01 29 | """ 30 | chadmin flamegraph cleanup --trace-type MemorySample 31 | """ 32 | And we execute command on clickhouse01 33 | """ 34 | chadmin flamegraph setup --trace-type Real 35 | """ 36 | And we execute command on clickhouse01 37 | """ 38 | clickhouse client --query-id 1234 --query ' SELECT count(*) FROM numbers(4000000) AS l LEFT JOIN (select rand32()%1000000 AS number FROM numbers(40000000)) AS r ON l.number=r.number SETTINGS use_query_cache=0;' 39 | """ 40 | And we execute command on clickhouse01 41 | """ 42 | chadmin flamegraph collect-by-query --query-id 1234 --trace-type Real 43 | """ 44 | And we execute command on clickhouse01 45 | """ 46 | chadmin flamegraph cleanup --trace-type Real 47 | """ 48 | Then it completes successfully 49 | -------------------------------------------------------------------------------- /tests/features/monrun_keeper.feature: -------------------------------------------------------------------------------- 1 | Feature: keeper-monitoring tool 2 | 3 | Background: 4 | Given default configuration 5 | And a working s3 6 | And a working zookeeper 7 | And a working clickhouse on clickhouse01 8 | 9 | Scenario: Check status command not throwing 10 | When we execute command on zookeeper01 11 | """ 12 | keeper-monitoring status 13 | """ 14 | 15 | Scenario: Check Zookeeper alive with keeper monitoring 16 | When we execute command on zookeeper01 17 | """ 18 | keeper-monitoring -n alive 19 | """ 20 | Then we get response 21 | """ 22 | 0;OK 23 | """ 24 | When we execute command on zookeeper01 25 | """ 26 | supervisorctl stop zookeeper 27 | """ 28 | When we execute command on zookeeper01 29 | """ 30 | keeper-monitoring -n alive 31 | """ 32 | Then we get response 33 | """ 34 | 2;KazooTimeoutError('Connection time-out') 35 | """ 36 | 37 | Scenario: Check ZooKeeper version 38 | When we execute command on zookeeper01 39 | """ 40 | keeper-monitoring version 41 | """ 42 | Then we get response contains 43 | """ 44 | 0; 45 | """ 46 | When we execute command on zookeeper01 47 | """ 48 | supervisorctl stop zookeeper 49 | """ 50 | When we execute command on zookeeper01 51 | """ 52 | keeper-monitoring version 53 | """ 54 | Then we get response 55 | """ 56 | 1;ConnectionRefusedError(111, 'Connection refused') 57 | """ 58 | 59 | Scenario: Check CH keeper alive with keeper monitoring 60 | Given a working keeper on clickhouse01 61 | When we execute command on clickhouse01 62 | """ 63 | keeper-monitoring -n alive 64 | """ 65 | Then we get response 66 | """ 67 | 0;OK 68 | """ 69 | When we execute command on clickhouse01 70 | """ 71 | supervisorctl stop clickhouse-server 72 | """ 73 | When we execute command on clickhouse01 74 | """ 75 | keeper-monitoring -n alive 76 | """ 77 | Then we get response 78 | """ 79 | 2;KazooTimeoutError('Connection time-out') 80 | """ 81 | # check that keeper-monitoring works fine without CH configs 82 | When we execute command on clickhouse01 83 | """ 84 | rm -fr /etc/clickhouse* 85 | keeper-monitoring -n alive 86 | """ 87 | Then we get response 88 | """ 89 | 2;KazooTimeoutError('Connection time-out') 90 | """ 91 | -------------------------------------------------------------------------------- /tests/features/s3_credentials.feature: -------------------------------------------------------------------------------- 1 | Feature: ch_s3_credentials tool 2 | 3 | Background: 4 | Given default configuration 5 | And a working s3 6 | And a working zookeeper 7 | And a working clickhouse on clickhouse01 8 | And a working clickhouse on clickhouse02 9 | And a working http server 10 | 11 | Scenario Outline: chadmin s3 check work correctly 12 | When we execute command on clickhouse01 13 | """ 14 | ch-monitoring --setting cloud.metadata_service_endpoint http://http_mock01:8080 s3-credentials-config --missing 15 | """ 16 | Then we get response 17 | """ 18 | 0;OK 19 | """ 20 | When we execute command on clickhouse01 21 | """ 22 | chadmin --setting cloud.metadata_service_endpoint http://http_mock01:8080 s3-credentials-config update --endpoint=storage.com 23 | """ 24 | And we execute command on clickhouse01 25 | """ 26 | ch-monitoring --setting cloud.metadata_service_endpoint http://http_mock01:8080 s3-credentials-config --present 27 | """ 28 | Then we get response 29 | """ 30 | 0;OK 31 | """ 32 | When we execute command on clickhouse01 33 | """ 34 | cat /etc/clickhouse-server/config.d/s3_credentials.xml 35 | """ 36 | Then we get response 37 | """ 38 | 39 | 40 | 41 | 42 | storage.com 43 | <>X-YaCloud-SubjectToken: IAM_TOKEN> 44 | 45 | 46 | 47 | """ 48 | @require_version_24.11 49 | Examples: 50 | | header_tag_name | 51 | | access_header | 52 | 53 | @require_version_less_than_24.11 54 | Examples: 55 | | header_tag_name | 56 | | header | 57 | 58 | Scenario: Offline token update. 59 | Given installed clickhouse-tools config with version on clickhouse01 60 | When we execute command on clickhouse01 61 | """ 62 | ch-monitoring --setting cloud.metadata_service_endpoint http://http_mock01:8080 s3-credentials-config --missing 63 | """ 64 | When we execute command on clickhouse01 65 | """ 66 | supervisorctl stop clickhouse-server 67 | """ 68 | When we execute command on clickhouse01 69 | """ 70 | chadmin --setting cloud.metadata_service_endpoint http://http_mock01:8080 s3-credentials-config update --endpoint=storage.com 71 | """ 72 | And we execute command on clickhouse01 73 | """ 74 | ch-monitoring --setting cloud.metadata_service_endpoint http://http_mock01:8080 s3-credentials-config --present 75 | """ 76 | Then we get response 77 | """ 78 | 0;OK 79 | """ 80 | -------------------------------------------------------------------------------- /tests/images/clickhouse/config/ch-backup.conf: -------------------------------------------------------------------------------- 1 | backup: 2 | path_root: ch_backup/ 3 | deduplicate_parts: True 4 | retain_time: 5 | days: 1 6 | retain_count: 1 7 | deduplication_age_limit: 8 | days: 1 9 | 10 | main: 11 | ca_bundle: [] 12 | 13 | encryption: 14 | type: nacl 15 | key: {{ conf.ch_backup.encrypt_key }} 16 | 17 | storage: 18 | type: s3 19 | credentials: 20 | endpoint_url: '{{ conf.s3.endpoint }}' 21 | access_key_id: {{ conf.s3.access_key_id }} 22 | secret_access_key: {{ conf.s3.access_secret_key }} 23 | bucket: {{ conf.s3.bucket }} 24 | 25 | zookeeper: 26 | hosts: 'zookeeper01:2181' 27 | root_path: '/' 28 | -------------------------------------------------------------------------------- /tests/images/clickhouse/config/clickhouse-keyring.gpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/tests/images/clickhouse/config/clickhouse-keyring.gpg -------------------------------------------------------------------------------- /tests/images/clickhouse/config/config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 1 4 | 5 | 0.0.0.0 6 | 8443 7 | 9440 8 | 9 | 10 | /etc/clickhouse-server/ssl/server.crt 11 | /etc/clickhouse-server/ssl/server.key 12 | 13 | none 14 | true 15 | true 16 | sslv2,sslv3 17 | true 18 | 19 | 20 | 21 | 22 | 23 | true 24 | {% for instance in conf.services.clickhouse.instances %} 25 | 26 | {{ instance }} 27 | 9000 28 | 29 | {% endfor %} 30 | 31 | 32 | 33 | 34 | 35 | zookeeper01 36 | 2181 37 | 38 | 3000 39 | 40 | 41 | 42 | 43 | 44 | s3 45 | {{ conf.s3.endpoint }}/{{ conf.s3.bucket }}/data/ 46 | {{ conf.s3.access_key_id }} 47 | {{ conf.s3.access_secret_key }} 48 | 49 | 50 | 51 | 52 | 53 | 54 | default 55 | 56 | 57 | 58 | 59 | 60 | 61 | object_storage 62 | 63 | 64 | 65 | 66 | 67 | 68 | 2281 69 | 1 70 | /var/lib/clickhouse/coordination/log 71 | /var/lib/clickhouse/coordination/snapshots 72 | 73 | 5000 74 | trace 75 | 10000 76 | 77 | 78 | 79 | 127.0.0.1 80 | 1 81 | 2888 82 | 83 | 84 | 85 | 86 | {{ instance_name }}.{{ conf.network_name }} 87 | cluster 88 | shard1 89 | 90 | 91 | -------------------------------------------------------------------------------- /tests/images/clickhouse/config/dbaas.conf: -------------------------------------------------------------------------------- 1 | {{ conf.dbaas_conf | json }} 2 | -------------------------------------------------------------------------------- /tests/images/clickhouse/config/monitor-ch-backup: -------------------------------------------------------------------------------- 1 | monitor ALL=NOPASSWD: /usr/bin/ch-backup list * 2 | -------------------------------------------------------------------------------- /tests/images/clickhouse/config/regions_hierarchy.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/tests/images/clickhouse/config/regions_hierarchy.txt -------------------------------------------------------------------------------- /tests/images/clickhouse/config/regions_names_ru.txt: -------------------------------------------------------------------------------- 1 | 1 Москва и Московская область 2 | -------------------------------------------------------------------------------- /tests/images/clickhouse/config/supervisor/conf.d/clickhouse-server.conf: -------------------------------------------------------------------------------- 1 | [program:clickhouse-server] 2 | command=/usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml 3 | process_name=%(program_name)s 4 | autostart=true 5 | autorestart=true 6 | stopsignal=TERM 7 | user=clickhouse 8 | stdout_logfile=/dev/stderr 9 | stdout_logfile_maxbytes=0 10 | stderr_logfile=/dev/stderr 11 | stderr_logfile_maxbytes=0 -------------------------------------------------------------------------------- /tests/images/clickhouse/config/supervisor/conf.d/sshd.conf: -------------------------------------------------------------------------------- 1 | [program:sshd] 2 | command=/usr/sbin/sshd -D 3 | process_name=%(program_name)s 4 | autostart=true 5 | autorestart=true 6 | stopsignal=TERM 7 | user=root 8 | stdout_logfile=/dev/stderr 9 | stdout_logfile_maxbytes=0 10 | stderr_logfile=/dev/stderr 11 | stderr_logfile_maxbytes=0 -------------------------------------------------------------------------------- /tests/images/clickhouse/config/supervisor/supervisord.conf: -------------------------------------------------------------------------------- 1 | [unix_http_server] 2 | file=/var/run/supervisor.sock 3 | chmod=0700 4 | 5 | [supervisord] 6 | logfile=/dev/null 7 | logfile_maxbytes=0 8 | pidfile=/var/run/supervisord.pid 9 | minfds=1024 10 | nodaemon=true 11 | 12 | [rpcinterface:supervisor] 13 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface 14 | 15 | [supervisorctl] 16 | serverurl=unix:///var/run/supervisor.sock 17 | 18 | [include] 19 | files = /etc/supervisor/conf.d/*.conf 20 | -------------------------------------------------------------------------------- /tests/images/clickhouse/config/users.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | ::/0 8 | 9 | default 10 | 1 11 | 12 | <_monitor> 13 | 14 | 15 | ::/0 16 | 17 | default 18 | 19 | <_admin> 20 | 21 | 22 | ::/0 23 | 24 | default 25 | 1 26 | 27 | 28 | 29 | 30 | 20 31 | 1 32 | 1 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /tests/images/http_mock/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3-alpine 2 | RUN pip3 install flask --no-cache 3 | ENV FLASK_APP=/service.py 4 | COPY tests/images/http_mock/service.py / 5 | CMD ["python3", "-m", "flask", "run", "--host=0.0.0.0", "--port=8080"] 6 | -------------------------------------------------------------------------------- /tests/images/http_mock/service.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from flask import Flask 4 | 5 | app = Flask(__name__) 6 | 7 | 8 | @app.route("/computeMetadata/v1/instance/service-accounts/default/token") 9 | def token(): 10 | return json.dumps( 11 | {"access_token": "IAM_TOKEN", "expires_in": 0, "token_type": "Bearer"} 12 | ) 13 | 14 | 15 | @app.route("/") 16 | def ping(): 17 | return "OK" 18 | -------------------------------------------------------------------------------- /tests/images/minio/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | 3 | ENV MINIO_ACCESS_KEY={{ conf.s3.access_key_id }} 4 | ENV MINIO_SECRET_KEY={{ conf.s3.access_secret_key }} 5 | 6 | RUN apk add --no-cache minio minio-client 7 | COPY tests/images/minio/config/mc.json /root/.mcli/config.json 8 | VOLUME ["/export"] 9 | CMD ["minio", "server", "/export"] 10 | -------------------------------------------------------------------------------- /tests/images/minio/config/mc.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "10", 3 | "aliases": { 4 | "local": { 5 | "url": "http://localhost:9000", 6 | "accessKey": "{{conf.s3.access_key_id}}", 7 | "secretKey": "{{conf.s3.access_secret_key}}", 8 | "api": "S3v4", 9 | "path": "auto" 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /tests/images/zookeeper/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu/zookeeper 2 | 3 | RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y supervisor python3-pip && \ 4 | rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ 5 | apt-get clean 6 | 7 | COPY tests/images/zookeeper/config/zookeeper.conf /etc/supervisor/supervisord.conf 8 | COPY tests/images/zookeeper/config/zoo.cfg /etc/zookeeper/conf/zoo.cfg 9 | COPY tests/images/zookeeper/config/log4j.properties /etc/zookeeper/conf/log4j.properties 10 | 11 | COPY dist/*.whl / 12 | RUN python3 -m pip install *.whl 13 | 14 | ENTRYPOINT ["supervisord", "-c", "/etc/supervisor/supervisord.conf"] 15 | -------------------------------------------------------------------------------- /tests/images/zookeeper/config/log4j.properties: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | zookeeper.root.logger=INFO, ROLLINGFILE 3 | 4 | zookeeper.console.threshold=INFO 5 | 6 | zookeeper.log.dir=/var/log/zookeeper/ 7 | zookeeper.log.file=zookeeper.log 8 | zookeeper.log.threshold=INFO 9 | zookeeper.log.maxfilesize=256MB 10 | zookeeper.log.maxbackupindex=2 11 | 12 | zookeeper.tracelog.dir=/var/log/zookeeper 13 | zookeeper.tracelog.file=zookeeper_trace.log 14 | 15 | log4j.rootLogger=${zookeeper.root.logger} 16 | 17 | # 18 | # console 19 | # Add "console" to rootlogger above if you want to use this 20 | # 21 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 22 | log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold} 23 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 24 | log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n 25 | 26 | # 27 | # Add ROLLINGFILE to rootLogger to get log file output 28 | # 29 | log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender 30 | log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold} 31 | log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file} 32 | log4j.appender.ROLLINGFILE.MaxFileSize=${zookeeper.log.maxfilesize} 33 | log4j.appender.ROLLINGFILE.MaxBackupIndex=${zookeeper.log.maxbackupindex} 34 | log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout 35 | log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n 36 | 37 | # 38 | # Add TRACEFILE to rootLogger to get log file output 39 | # Log TRACE level and above messages to a log file 40 | # 41 | log4j.appender.TRACEFILE=org.apache.log4j.FileAppender 42 | log4j.appender.TRACEFILE.Threshold=TRACE 43 | log4j.appender.TRACEFILE.File=${zookeeper.tracelog.dir}/${zookeeper.tracelog.file} 44 | 45 | log4j.appender.TRACEFILE.layout=org.apache.log4j.PatternLayout 46 | ### Notice we are including log4j's NDC here (%x) 47 | log4j.appender.TRACEFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L][%x] - %m%n 48 | 49 | -------------------------------------------------------------------------------- /tests/images/zookeeper/config/start_zk.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Basically, a cannibalized version of ubuntu upstart script 3 | # + contents of /etc/zookeeper/conf/environment 4 | ulimit -n 8196 5 | 6 | NAME=zookeeper 7 | JMXDISABLE="yes, please" 8 | ZOOCFGDIR=/etc/$NAME/conf 9 | 10 | # TODO this is really ugly 11 | # How to find out, which jars are needed? 12 | # seems, that log4j requires the log4j.properties file to be in the classpath 13 | CLASSPATH="$ZOOCFGDIR:/usr/share/java/jline.jar:/usr/share/java/log4j-1.2.jar:/usr/share/java/xercesImpl.jar:/usr/share/java/xmlParserAPIs.jar:/usr/share/java/netty.jar:/usr/share/java/slf4j-api.jar:/usr/share/java/slf4j-log4j12.jar:/usr/share/java/zookeeper.jar" 14 | 15 | EXEC_OVERRIDES="-Dzookeeper.forceSync=no \ 16 | -Djute.maxbuffer=16777216 \ 17 | -Dzookeeper.snapCount=10000" 18 | 19 | ZOOCFG="$ZOOCFGDIR/zoo.cfg" 20 | ZOO_LOG_DIR=/var/log/$NAME 21 | USER=$NAME 22 | GROUP=$NAME 23 | PIDDIR=/var/run/$NAME 24 | PIDFILE=$PIDDIR/$NAME.pid 25 | JAVA=/usr/bin/java 26 | ZOOMAIN="org.apache.zookeeper.server.quorum.QuorumPeerMain" 27 | ZOO_LOG4J_PROP="INFO,ROLLINGFILE" 28 | JMXLOCALONLY="true" 29 | JAVA_OPTS="-XX:+UseG1GC -Xmx256M -XX:+PrintGCDateStamps -Xloggc:/var/log/zookeeper/gc.log -Djava.net.preferIPv6Addresses=true -Djava.net.preferIPv4Stack=false ${EXEC_OVERRIDES}" 30 | 31 | 32 | [ -r "/usr/share/java/zookeeper.jar" ] || exit 0 33 | [ -d $ZOO_LOG_DIR ] || mkdir -p $ZOO_LOG_DIR 34 | chown $USER:$GROUP $ZOO_LOG_DIR 35 | 36 | [ -r /etc/default/zookeeper ] && . /etc/default/zookeeper 37 | if [ -z "$JMXDISABLE" ]; then 38 | JAVA_OPTS="$JAVA_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.local.only=$JMXLOCALONLY" 39 | fi 40 | # Set "myid" with a number from hostname: zookeeper01.domain -> 1 41 | myid=$(hostname -f | awk -F '.' '{printf("%s", $1)}' | tail -c 1) 42 | datadir=$(awk -F'=' '/dataDir/ {print $2}' "${ZOOCFG}") 43 | [ -z ${myid} ] && exit 1 44 | echo ${myid} > /etc/zookeeper/conf/myid 45 | rm -f ${datadir}/myid 46 | ln -s /etc/zookeeper/conf/myid ${datadir}/myid 47 | 48 | # Start process. ZK`s main class never detaches, so start-stop stays in foreground. 49 | exec start-stop-daemon --start -c $USER --exec $JAVA --name zookeeper -- \ 50 | -cp $CLASSPATH $JAVA_OPTS -Dzookeeper.log.dir=${ZOO_LOG_DIR} \ 51 | -Dzookeeper.root.logger=${ZOO_LOG4J_PROP} $ZOOMAIN $ZOOCFG 52 | -------------------------------------------------------------------------------- /tests/images/zookeeper/config/zoo.cfg: -------------------------------------------------------------------------------- 1 | autopurge.purgeInterval=1 2 | autopurge.snapRetainCount=2 3 | clientPort=2181 4 | dataDir=/var/lib/zookeeper 5 | forceSync=no 6 | fsync.warningthresholdms=500 7 | initLimit=7200 8 | jute.maxbuffer=16777216 9 | leaderServes=yes 10 | maxClientCnxns=2000 11 | cnxTimeout=3000 12 | maxSessionTimeout=60000 13 | quorumListenOnAllIPs=true 14 | skipACL=yes 15 | snapCount=10000 16 | syncLimit=20 17 | tickTime=2000 18 | 4lw.commands.whitelist=* 19 | -------------------------------------------------------------------------------- /tests/images/zookeeper/config/zookeeper.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | logfile=/dev/null 3 | logfile_maxbytes=0 4 | pidfile=/var/run/supervisord.pid 5 | minfds=1024 6 | nodaemon=true 7 | 8 | [unix_http_server] 9 | file=/var/run/supervisor.sock 10 | chmod=0700 11 | 12 | [supervisord] 13 | logfile=/dev/null 14 | logfile_maxbytes=0 15 | pidfile=/var/run/supervisord.pid 16 | minfds=1024 17 | nodaemon=true 18 | 19 | [rpcinterface:supervisor] 20 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface 21 | 22 | [supervisorctl] 23 | serverurl=unix:///var/run/supervisor.sock 24 | 25 | 26 | [program:zookeeper] 27 | command=bash /opt/kafka/bin/zookeeper-server-start.sh /etc/zookeeper/conf/zoo.cfg 28 | process_name=%(program_name)s 29 | autostart=true 30 | autorestart=true 31 | stopsignal=QUIT 32 | user=root 33 | stdout_logfile=/dev/stderr 34 | stdout_logfile_maxbytes=0 35 | stderr_logfile=/dev/stderr 36 | stderr_logfile_maxbytes=0 37 | -------------------------------------------------------------------------------- /tests/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/tests/modules/__init__.py -------------------------------------------------------------------------------- /tests/modules/chadmin.py: -------------------------------------------------------------------------------- 1 | from ch_tools.common import logging 2 | 3 | 4 | class Chadmin: 5 | def __init__(self, container): 6 | self._container = container 7 | 8 | def exec_cmd(self, cmd): 9 | ch_admin_cmd = f"chadmin {cmd}" 10 | logging.debug("chadmin command: {}", ch_admin_cmd) 11 | 12 | result = self._container.exec_run(["bash", "-c", ch_admin_cmd], user="root") 13 | return result 14 | 15 | def create_zk_node(self, zk_node, no_ch_config=False, recursive=True): 16 | cmd = "zookeeper {use_config} create {make_parents} {node}".format( 17 | use_config="--no-ch-config" if no_ch_config else "", 18 | make_parents="--make-parents" if recursive else "", 19 | node=zk_node, 20 | ) 21 | return self.exec_cmd(cmd) 22 | 23 | def zk_delete(self, zk_nodes, no_ch_config=False): 24 | cmd = "zookeeper {use_config} delete {nodes}".format( 25 | use_config="--no-ch-config" if no_ch_config else "", 26 | nodes=zk_nodes, 27 | ) 28 | return self.exec_cmd(cmd) 29 | 30 | def zk_list(self, zk_node, no_ch_config=False): 31 | cmd = "zookeeper {use_config} list {node}".format( 32 | use_config="--no-ch-config" if no_ch_config else "", 33 | node=zk_node, 34 | ) 35 | return self.exec_cmd(cmd) 36 | 37 | def zk_cleanup(self, fqdn, zk_root=None, no_ch_config=False, dry_run=False): 38 | cmd = "zookeeper {use_config} {root} cleanup-removed-hosts-metadata {hosts} {dry}".format( 39 | use_config="--no-ch-config" if no_ch_config else "", 40 | root=f"--chroot {zk_root}" if zk_root else "", 41 | hosts=fqdn, 42 | dry="" if not dry_run else "--dry-run", 43 | ) 44 | return self.exec_cmd(cmd) 45 | 46 | def zk_cleanup_table(self, fqdn, zk_table_path_): 47 | cmd = "zookeeper remove-hosts-from-table {zk_table_path} {hosts}".format( 48 | zk_table_path=zk_table_path_, 49 | hosts=fqdn, 50 | ) 51 | return self.exec_cmd(cmd) 52 | -------------------------------------------------------------------------------- /tests/modules/logs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logs management. 3 | """ 4 | 5 | import json 6 | import os 7 | 8 | from docker.models.containers import Container 9 | 10 | from ch_tools.common import logging 11 | 12 | from .docker import copy_container_dir, get_containers 13 | from .minio import export_s3_data 14 | from .typing import ContextT 15 | 16 | 17 | def save_logs(context: ContextT) -> None: 18 | """ 19 | Save logs and support materials. 20 | """ 21 | try: 22 | logs_dir = os.path.join(context.conf["staging_dir"], "logs") 23 | 24 | for container in get_containers(context): 25 | _save_container_logs(container, logs_dir) 26 | 27 | with open( 28 | os.path.join(logs_dir, "session_conf.json"), "w", encoding="utf-8" 29 | ) as out: 30 | json.dump(context.conf, out, default=repr, indent=4) 31 | 32 | export_s3_data(context, logs_dir) 33 | 34 | except Exception: 35 | logging.exception("Failed to save logs") 36 | raise 37 | 38 | 39 | def _save_container_logs(container: Container, logs_dir: str) -> None: 40 | base = os.path.join(logs_dir, container.name) 41 | os.makedirs(base, exist_ok=True) 42 | with open(os.path.join(base, "docker.log"), "wb") as out: 43 | out.write(container.logs(stdout=True, stderr=True, timestamps=True)) 44 | 45 | copy_container_dir(container, "/var/log", base) 46 | -------------------------------------------------------------------------------- /tests/modules/minio.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interface to Minio S3 server. 3 | """ 4 | 5 | import json 6 | import os 7 | 8 | from docker.models.containers import Container 9 | from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed 10 | 11 | from .docker import copy_container_dir, get_container 12 | from .typing import ContextT 13 | 14 | 15 | class MinioException(Exception): 16 | """ 17 | Minion exception. 18 | """ 19 | 20 | def __init__(self, response: dict) -> None: 21 | super().__init__(self._fmt_message(response)) 22 | self.response = response 23 | 24 | @staticmethod 25 | def _fmt_message(response: dict) -> str: 26 | try: 27 | error = response["error"] 28 | message = f'{error["message"]} Cause: {error["cause"]["message"]}' 29 | 30 | code = error["cause"]["error"].get("Code") 31 | if code: 32 | message = f"{message} [{code}]" 33 | 34 | return message 35 | 36 | except Exception: 37 | return f"Failed with response: {response}" 38 | 39 | 40 | class BucketAlreadyOwnedByYou(MinioException): 41 | """ 42 | BucketAlreadyOwnedByYou Minion exception. 43 | """ 44 | 45 | pass 46 | 47 | 48 | def initialize(context: ContextT) -> None: 49 | """ 50 | Initialize Minio server. 51 | """ 52 | _configure_s3_credentials(context) 53 | _create_s3_bucket(context) 54 | 55 | 56 | def export_s3_data(context: ContextT, path: str) -> None: 57 | """ 58 | Export S3 data to the specified directory. 59 | """ 60 | local_dir = os.path.join(path, "minio") 61 | copy_container_dir(_container(context), "/export", local_dir) 62 | 63 | 64 | @retry( 65 | retry=retry_if_exception_type(MinioException), 66 | wait=wait_fixed(1), 67 | stop=stop_after_attempt(10), 68 | ) 69 | def _configure_s3_credentials(context: ContextT) -> None: 70 | """ 71 | Configure S3 credentials in mc (Minio client). 72 | """ 73 | access_key = context.conf["s3"]["access_key_id"] 74 | secret_key = context.conf["s3"]["access_secret_key"] 75 | _mc_execute( 76 | context, 77 | f"config host add local http://localhost:9000 {access_key} {secret_key}", 78 | ) 79 | 80 | 81 | def _create_s3_bucket(context: ContextT) -> None: 82 | """ 83 | Create S3 bucket specified in the config. 84 | """ 85 | bucket = context.conf["s3"]["bucket"] 86 | try: 87 | _mc_execute(context, f"mb local/{bucket}") 88 | except BucketAlreadyOwnedByYou: 89 | pass 90 | 91 | 92 | def _container(context: ContextT) -> Container: 93 | return get_container(context, "minio01") 94 | 95 | 96 | def _mc_execute(context: ContextT, command: str) -> dict: 97 | """ 98 | Execute mc (Minio client) command. 99 | """ 100 | output = _container(context).exec_run(f"mcli --json {command}").output.decode() 101 | response = json.loads(output) 102 | if response["status"] == "success": 103 | return response 104 | 105 | error_code = response["error"]["cause"]["error"].get("Code") 106 | exception_types = { 107 | "BucketAlreadyOwnedByYou": BucketAlreadyOwnedByYou, 108 | } 109 | raise exception_types.get(error_code, MinioException)(response) 110 | -------------------------------------------------------------------------------- /tests/modules/s3.py: -------------------------------------------------------------------------------- 1 | """ 2 | S3 client. 3 | """ 4 | 5 | from typing import List, Optional 6 | 7 | import boto3 8 | from botocore.client import Config 9 | from botocore.errorfactory import ClientError 10 | 11 | from ch_tools.common import logging 12 | 13 | from . import docker 14 | from .typing import ContextT 15 | 16 | 17 | class S3Client: 18 | """ 19 | S3 client. 20 | """ 21 | 22 | def __init__(self, context: ContextT, bucket: Optional[str] = None) -> None: 23 | config = context.conf["s3"] 24 | boto_config = config["boto_config"] 25 | self._s3_session = boto3.session.Session( 26 | aws_access_key_id=config["access_key_id"], 27 | aws_secret_access_key=config["access_secret_key"], 28 | region_name=boto_config["region_name"], 29 | ) 30 | 31 | host, port = docker.get_exposed_port( 32 | docker.get_container(context, context.conf["s3"]["container"]), 33 | context.conf["s3"]["port"], 34 | ) 35 | endpoint_url = f"http://{host}:{port}" 36 | self._s3_client = self._s3_session.client( 37 | service_name="s3", 38 | endpoint_url=endpoint_url, 39 | config=Config( 40 | s3={ 41 | "addressing_style": boto_config["addressing_style"], 42 | "region_name": boto_config["region_name"], 43 | }, 44 | ), 45 | ) 46 | 47 | self._s3_bucket_name = bucket if bucket else config["bucket"] 48 | 49 | for module_logger in ("boto3", "botocore", "s3transfer", "urllib3"): 50 | logging.set_module_log_level(module_logger, logging.CRITICAL) 51 | 52 | def upload_data(self, data: bytes, remote_path: str) -> None: 53 | """ 54 | Upload given bytes or file-like object. 55 | """ 56 | remote_path = remote_path.lstrip("/") 57 | self._s3_client.put_object( 58 | Body=data, Bucket=self._s3_bucket_name, Key=remote_path 59 | ) 60 | 61 | def delete_data(self, remote_path: str) -> None: 62 | """ 63 | Delete file from storage. 64 | """ 65 | remote_path = remote_path.lstrip("/") 66 | self._s3_client.delete_object(Bucket=self._s3_bucket_name, Key=remote_path) 67 | 68 | def path_exists(self, remote_path: str) -> bool: 69 | """ 70 | Check if remote path exists. 71 | """ 72 | try: 73 | self._s3_client.head_object(Bucket=self._s3_bucket_name, Key=remote_path) 74 | return True 75 | except ClientError: 76 | return False 77 | 78 | def list_objects(self, prefix: str) -> List[str]: 79 | """ 80 | List all objects with given prefix. 81 | """ 82 | contents = [] 83 | paginator = self._s3_client.get_paginator("list_objects") 84 | list_object_kwargs = dict(Bucket=self._s3_bucket_name, Prefix=prefix) 85 | 86 | for result in paginator.paginate(**list_object_kwargs): 87 | if result.get("CommonPrefixes") is not None: 88 | for dir_prefix in result.get("CommonPrefixes"): 89 | contents.append(dir_prefix.get("Prefix")) 90 | 91 | if result.get("Contents") is not None: 92 | for file_key in result.get("Contents"): 93 | contents.append(file_key.get("Key")) 94 | 95 | return contents 96 | -------------------------------------------------------------------------------- /tests/modules/steps.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions to use in implementation of test steps. 3 | """ 4 | 5 | from typing import Any 6 | 7 | import yaml 8 | 9 | from .templates import render_template 10 | from .typing import ContextT 11 | 12 | 13 | def get_step_data(context: ContextT) -> Any: 14 | """ 15 | Return step data deserialized from YAML representation and processed by 16 | template engine. 17 | """ 18 | if not context.text: 19 | return {} 20 | 21 | return yaml.load(render_template(context, context.text), yaml.SafeLoader) 22 | -------------------------------------------------------------------------------- /tests/modules/typing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Type definitions. 3 | """ 4 | 5 | from types import SimpleNamespace 6 | from typing import Union 7 | 8 | from behave.runner import Context 9 | 10 | ContextT = Union[Context, SimpleNamespace] # pylint: disable=invalid-name 11 | -------------------------------------------------------------------------------- /tests/modules/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions. 3 | """ 4 | 5 | import string 6 | from functools import wraps 7 | from random import choice as random_choise 8 | from types import SimpleNamespace 9 | from typing import Mapping, MutableMapping 10 | 11 | from packaging.version import parse as parse_version 12 | 13 | from ch_tools.common import logging 14 | 15 | from .typing import ContextT 16 | 17 | 18 | def merge(original, update): 19 | """ 20 | Recursively merge update dict into original. 21 | """ 22 | for key in update: 23 | recurse_conditions = [ 24 | key in original, 25 | isinstance(original.get(key), MutableMapping), 26 | isinstance(update.get(key), Mapping), 27 | ] 28 | if all(recurse_conditions): 29 | merge(original[key], update[key]) 30 | else: 31 | original[key] = update[key] 32 | return original 33 | 34 | 35 | def env_stage(event, fail=False): 36 | """ 37 | Nicely logs env stage. 38 | """ 39 | 40 | def wrapper(fun): 41 | @wraps(fun) 42 | def _wrapped_fun(*args, **kwargs): 43 | stage_name = f"{fun.__module__}.{fun.__name__}" 44 | logging.info("initiating {} stage {}", event, stage_name) 45 | try: 46 | return fun(*args, **kwargs) 47 | except Exception as e: 48 | logging.error("{} failed: {!r}", stage_name, e) 49 | if fail: 50 | raise 51 | 52 | return _wrapped_fun 53 | 54 | return wrapper 55 | 56 | 57 | def generate_random_string(length: int = 64) -> str: 58 | """ 59 | Generate random alphanum sequence. 60 | """ 61 | return "".join( 62 | random_choise(string.ascii_letters + string.digits) for _ in range(length) 63 | ) 64 | 65 | 66 | def context_to_dict(context: ContextT) -> dict: 67 | """ 68 | Convert context to dict representation. 69 | 70 | The context type can be either types.SimpleNamespace or behave.Context. 71 | """ 72 | if isinstance(context, SimpleNamespace): 73 | return context.__dict__ 74 | 75 | result: dict = {} 76 | for frame in context._stack: # pylint: disable=protected-access 77 | for key, value in frame.items(): 78 | if key not in result: 79 | result[key] = value 80 | 81 | return result 82 | 83 | 84 | def version_ge(current_version, comparing_version): 85 | """ 86 | Return True if `current_version` is greater or equal than `comparing_version`, or False otherwise. 87 | """ 88 | # "latest" is greater or equal than any known version 89 | if current_version == "latest": 90 | return True 91 | 92 | return parse_version(current_version) >= parse_version(comparing_version) # type: ignore 93 | 94 | 95 | def version_lt(current_version, comparing_version): 96 | """ 97 | Return True if `current_version` is less than `comparing_version`, or False otherwise. 98 | """ 99 | # "latest" is not less than any known version 100 | if current_version == "latest": 101 | return False 102 | 103 | return parse_version(current_version) < parse_version(comparing_version) # type: ignore 104 | -------------------------------------------------------------------------------- /tests/steps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/tests/steps/__init__.py -------------------------------------------------------------------------------- /tests/steps/chadmin.py: -------------------------------------------------------------------------------- 1 | """ 2 | Steps for interacting with chadmin. 3 | """ 4 | 5 | from behave import then, when 6 | from hamcrest import assert_that, equal_to 7 | from modules.chadmin import Chadmin 8 | from modules.docker import get_container 9 | from modules.typing import ContextT 10 | 11 | 12 | @when("we execute chadmin create zk nodes on {node:w}") 13 | def step_create_(context: ContextT, node: str) -> None: 14 | container = get_container(context, node) 15 | nodes = context.text.strip().split("\n") 16 | chadmin = Chadmin(container) 17 | 18 | for node in nodes: 19 | result = chadmin.create_zk_node(node) 20 | assert result.exit_code == 0, f" output:\n {result.output.decode().strip()}" 21 | 22 | 23 | @when("we do hosts cleanup on {node} with fqdn {fqdn} and zk root {zk_root}") 24 | def step_host_cleanup_with_zk_root( 25 | context: ContextT, node: str, fqdn: str, zk_root: str 26 | ) -> None: 27 | container = get_container(context, node) 28 | result = Chadmin(container).zk_cleanup(fqdn, zk_root) 29 | assert result.exit_code == 0, f" output:\n {result.output.decode().strip()}" 30 | 31 | 32 | @when("we do hosts dry cleanup on {node} with fqdn {fqdn} and zk root {zk_root}") 33 | def step_host_dry_cleanup_with_zk_root( 34 | context: ContextT, node: str, fqdn: str, zk_root: str 35 | ) -> None: 36 | container = get_container(context, node) 37 | result = Chadmin(container).zk_cleanup(fqdn, zk_root, dry_run=True) 38 | assert result.exit_code == 0, f" output:\n {result.output.decode().strip()}" 39 | 40 | 41 | @when("we do hosts cleanup on {node} with fqdn {fqdn}") 42 | def step_host_cleanup(context: ContextT, node: str, fqdn: str) -> None: 43 | container = get_container(context, node) 44 | result = Chadmin(container).zk_cleanup(fqdn, no_ch_config=False) 45 | assert result.exit_code == 0, f" output:\n {result.output.decode().strip()}" 46 | 47 | 48 | @when( 49 | "we do table cleanup on {node} with fqdn {fqdn} from table with {zk_table_path} zookeeper path" 50 | ) 51 | def step_table_cleanup( 52 | context: ContextT, node: str, fqdn: str, zk_table_path: str 53 | ) -> None: 54 | container = get_container(context, node) 55 | result = Chadmin(container).zk_cleanup_table(fqdn, zk_table_path) 56 | assert result.exit_code == 0, f" output:\n {result.output.decode().strip()}" 57 | 58 | 59 | @then("the list of children on {node:w} for zk node {zk_node} are equal to") 60 | def step_childen_list(context: ContextT, node: str, zk_node: str) -> None: 61 | container = get_container(context, node) 62 | result = Chadmin(container).zk_list(zk_node) 63 | assert_that(result.output.decode(), equal_to(context.text + "\n")) 64 | 65 | 66 | @then("the list of children on {node:w} for zk node {zk_node} are empty") 67 | def step_childen_list_empty(context: ContextT, node: str, zk_node: str) -> None: 68 | container = get_container(context, node) 69 | result = Chadmin(container).zk_list(zk_node) 70 | assert_that(result.output.decode(), equal_to("\n")) 71 | 72 | 73 | @when("we delete zookeepers nodes {zk_nodes} on {node:w}") 74 | def step_delete_command(context: ContextT, zk_nodes: str, node: str) -> None: 75 | container = get_container(context, node) 76 | result = Chadmin(container).zk_delete(zk_nodes) 77 | assert result.exit_code == 0, f" output:\n {result.output.decode().strip()}" 78 | -------------------------------------------------------------------------------- /tests/steps/failure_mockers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Steps for interacting with ClickHouse DBMS. 3 | """ 4 | 5 | import os 6 | 7 | from behave import when 8 | from modules import s3 9 | from modules.clickhouse import execute_query 10 | from modules.steps import get_step_data 11 | from modules.typing import ContextT 12 | 13 | 14 | @when("we remove key from s3 for partitions database {database} on {node:w}") 15 | def step_check_number_ro_replicas(context: ContextT, database: str, node: str) -> None: 16 | data = get_step_data(context) 17 | keys_to_remove = [] 18 | 19 | # Get the list of keys in s3 to broke specified partitions. 20 | for database, database_info in data.items(): 21 | for table, table_info in database_info.items(): 22 | for partition in table_info: 23 | get_parts_info_query = f"SELECT name, partition, path FROM system.parts where database='{database}' and table='{table}' and partition='{partition}'" 24 | # Get local path on disk to the single data part. 25 | part_local_path = execute_query( 26 | context, node, get_parts_info_query, format_="JSONCompact" 27 | )["data"][0][2] 28 | # For this part, get the single object key in s3. 29 | get_object_key_query = f"SELECT concat(path, local_path) AS full_path, remote_path from system.remote_data_paths WHERE disk_name='object_storage' and startsWith(full_path, '{os.path.join(part_local_path, 'columns.txt')}')" 30 | data_object_key = execute_query( 31 | context, node, get_object_key_query, format_="JSONCompact" 32 | )["data"][0][1] 33 | keys_to_remove.append(data_object_key) 34 | 35 | s3_client = s3.S3Client(context) 36 | for key in keys_to_remove: 37 | s3_client.delete_data(key) 38 | -------------------------------------------------------------------------------- /tests/steps/s3.py: -------------------------------------------------------------------------------- 1 | """ 2 | Steps for interacting with S3. 3 | """ 4 | 5 | from behave import given, then, when 6 | from hamcrest import assert_that, equal_to, greater_than 7 | from modules import minio, s3 8 | from modules.steps import get_step_data 9 | from modules.typing import ContextT 10 | 11 | 12 | @given("a working S3") 13 | def step_wait_for_s3_alive(context: ContextT) -> None: 14 | """ 15 | Ensure that S3 is ready to accept incoming requests. 16 | """ 17 | minio.initialize(context) 18 | 19 | 20 | @then("S3 contains {count:d} objects") 21 | def step_s3_contains_files(context: ContextT, count: int) -> None: 22 | s3_client = s3.S3Client(context) 23 | objects = s3_client.list_objects("") 24 | assert_that( 25 | len(objects), 26 | equal_to(count), 27 | f"Objects count = {len(objects)}, expected {count}, objects {objects}", 28 | ) 29 | 30 | 31 | @then("S3 contains greater than {count:d} objects") 32 | def step_s3_contains_greater_than_files(context: ContextT, count: int) -> None: 33 | s3_client = s3.S3Client(context) 34 | objects = s3_client.list_objects("") 35 | assert_that( 36 | len(objects), 37 | greater_than(count), 38 | f"Objects count = {len(objects)}, expected greater than {count}, objects {objects}", 39 | ) 40 | 41 | 42 | @then("S3 bucket {bucket} contains {count:d} objects") 43 | def step_cloud_storage_bucket_contains_files( 44 | context: ContextT, bucket: str, count: int 45 | ) -> None: 46 | s3_client = s3.S3Client(context, bucket) 47 | objects = s3_client.list_objects("") 48 | assert_that( 49 | len(objects), 50 | equal_to(count), 51 | f"Objects count = {len(objects)}, expected {count}, objects {objects}", 52 | ) 53 | 54 | 55 | @when("we put object in S3") 56 | def step_put_file_in_s3(context: ContextT) -> None: 57 | conf = get_step_data(context) 58 | s3_client = s3.S3Client(context, conf["bucket"]) 59 | s3_client.upload_data(conf["data"], conf["path"]) 60 | assert s3_client.path_exists(conf["path"]) 61 | 62 | 63 | @when("we put {count:d} objects in S3") 64 | def step_put_file_count_in_s3(context: ContextT, count: int) -> None: 65 | conf = get_step_data(context) 66 | s3_client = s3.S3Client(context, conf["bucket"]) 67 | for i in range(count): 68 | path = f"{conf['path'].format(i)}" 69 | s3_client.upload_data(conf["data"], path) 70 | assert s3_client.path_exists(path) 71 | 72 | 73 | @when("we delete object in S3") 74 | def stop_delete_file_in_S3(context: ContextT) -> None: 75 | conf = get_step_data(context) 76 | s3_client = s3.S3Client(context, conf["bucket"]) 77 | s3_client.delete_data(conf["path"]) 78 | assert not s3_client.path_exists(conf["path"]) 79 | 80 | 81 | @then("Path does not exist in S3") 82 | def step_create_file_in_s3(context: ContextT) -> None: 83 | conf = get_step_data(context) 84 | s3_client = s3.S3Client(context, conf["bucket"]) 85 | assert not s3_client.path_exists(conf["path"]) 86 | -------------------------------------------------------------------------------- /tests/steps/zookeeper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Steps for interacting with ZooKeeper or Clickhouse Keeper. 3 | """ 4 | 5 | import os 6 | 7 | from behave import given, then 8 | from kazoo.client import KazooClient 9 | from modules.docker import get_container, get_exposed_port 10 | from tenacity import retry, stop_after_attempt, wait_fixed 11 | 12 | from ch_tools.common import logging 13 | 14 | 15 | @given("a working zookeeper") 16 | @retry(wait=wait_fixed(0.5), stop=stop_after_attempt(40)) 17 | def step_wait_for_zookeeper_alive(context): 18 | """ 19 | Ensure that ZK is ready to accept incoming requests. 20 | """ 21 | client = _zk_client(context) 22 | try: 23 | client.start() 24 | finally: 25 | client.stop() 26 | 27 | 28 | @given("a working keeper on {node:w}") 29 | @retry(wait=wait_fixed(0.5), stop=stop_after_attempt(20)) 30 | def step_wait_for_keeper_alive(context, node): 31 | """ 32 | Wait until clickhouse keeper is ready to accept incoming requests. 33 | """ 34 | client = _zk_client(context, instance_name=node, port=2281, use_ssl=True) 35 | try: 36 | client.start() 37 | client.get("/") 38 | except Exception: 39 | client.stop() 40 | raise 41 | finally: 42 | client.stop() 43 | 44 | 45 | @given("we have removed ZK metadata for {node:w}") 46 | def clean_zk_tables_metadata_for_host(context, node): 47 | """ 48 | Remove all metadata for specified host from ZK 49 | """ 50 | 51 | def recursive_remove_node_data(zk_client, path, node): 52 | for subpath in zk_client.get_children(path): 53 | if subpath == node: 54 | zk_client.delete(os.path.join(path, subpath), recursive=True) 55 | else: 56 | recursive_remove_node_data(zk_client, os.path.join(path, subpath), node) 57 | 58 | client = _zk_client(context) 59 | 60 | try: 61 | client.start() 62 | recursive_remove_node_data(client, "/", node) 63 | finally: 64 | client.stop() 65 | 66 | 67 | @then('we get zookeeper node with "{path}" path') 68 | def step_get_zk_node(context, path): 69 | client = _zk_client(context) 70 | 71 | try: 72 | client.start() 73 | result = client.get(path)[0].decode().strip() 74 | finally: 75 | client.stop() 76 | 77 | print(result) 78 | 79 | 80 | def _zk_client(context, instance_name="zookeeper01", port=2181, use_ssl=False): 81 | logging.set_module_log_level("kazoo", logging.CRITICAL) 82 | 83 | zk_container = get_container(context, instance_name) 84 | host, port = get_exposed_port(zk_container, port) 85 | 86 | return KazooClient(f"{host}:{port}", use_ssl=use_ssl, verify_certs=False) 87 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/chadmin/test_validate_version.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ch_tools.chadmin.internal.system import match_str_ch_version, validate_version 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "version", 8 | [ 9 | "22.8.21.38", 10 | "24.8.15.1", 11 | "24.10.4.191", 12 | "24.10.4.191-lts", 13 | "24.10.4.191-lts-645f29c", 14 | ], 15 | ) 16 | def test_validate_version(version): 17 | validate_version(version) 18 | 19 | 20 | @pytest.mark.parametrize( 21 | "version,min_version", 22 | [ 23 | ("22.8.21.38", "22.8.21.38"), 24 | ("24.10.4.191", "22.8.21.38"), 25 | ("24.10.4.191-lts", "22.8.21.38"), 26 | ("24.10.4.191", "22.8.21.38-lts"), 27 | ("24.10.4.191-lts", "22.8.21.38-lts"), 28 | ("24.10.4.191-lts-34d87c12a", "22.8.21.38"), 29 | ("24.10.4.191", "22.8.21.38-lts-34d87c12a"), 30 | ], 31 | ) 32 | def test_match_str_ch_version(version, min_version): 33 | match_str_ch_version(version, min_version) 34 | -------------------------------------------------------------------------------- /tests/unit/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/tests/unit/common/__init__.py -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/tests/unit/common/clickhouse/__init__.py -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/broken_no_engine.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '40c7c9a8-7451-4a10-8b43-443436f33413' 2 | ( 3 | `generation` UInt64, 4 | `date_key` DateTime, 5 | `number` UInt64, 6 | `text` String, 7 | `expired` DateTime DEFAULT now() 8 | ) 9 | ENGINE 10 | PARTITION BY toMonth(date_key) 11 | ORDER BY (generation, date_key) 12 | TTL expired + toIntervalMinute(3) TO DISK 'object_storage' 13 | SETTINGS index_granularity = 819 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/broken_no_engine_full.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '9612256b-b461-4df5-8015-72f9727d1f95' 2 | ( 3 | `generation` UInt64, 4 | `date_key` DateTime, 5 | `number` UInt64, 6 | `text` String, 7 | `expired` DateTime DEFAULT now() 8 | ) 9 | PARTITION BY toMonth(date_key) 10 | ORDER BY (generation, date_key) 11 | TTL expired + toIntervalMinute(3) TO DISK 'object_storage' 12 | SETTINGS index_granularity = 819 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/broken_no_uuid.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID 2 | ( 3 | `generation` UInt64, 4 | `date_key` DateTime, 5 | `number` UInt64, 6 | `text` String, 7 | `expired` DateTime DEFAULT now() 8 | ) 9 | ENGINE = MergeTree 10 | PARTITION BY toMonth(date_key) 11 | ORDER BY (generation, date_key) 12 | TTL expired + toIntervalMinute(3) TO DISK 'object_storage' 13 | SETTINGS index_granularity = 819 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/broken_no_uuid_full.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ 2 | ( 3 | `generation` UInt64, 4 | `date_key` DateTime, 5 | `number` UInt64, 6 | `text` String, 7 | `expired` DateTime DEFAULT now() 8 | ) 9 | ENGINE = MergeTree 10 | PARTITION BY toMonth(date_key) 11 | ORDER BY (generation, date_key) 12 | TTL expired + toIntervalMinute(3) TO DISK 'object_storage' 13 | SETTINGS index_granularity = 819 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/broken_uuid.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID 'b461-4df5-8015-72f9727d1f95' 2 | ( 3 | `generation` UInt64, 4 | `date_key` DateTime, 5 | `number` UInt64, 6 | `text` String, 7 | `expired` DateTime DEFAULT now() 8 | ) 9 | ENGINE = MergeTree 10 | PARTITION BY toMonth(date_key) 11 | ORDER BY (generation, date_key) 12 | TTL expired + toIntervalMinute(3) TO DISK 'object_storage' 13 | SETTINGS index_granularity = 819 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_aggregating_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '40c7c9a8-7451-4a10-8b43-443436f33413' 2 | ( 3 | `StartDate` DateTime64(3), 4 | `CounterID` UInt64, 5 | `Visits` AggregateFunction(sum, Nullable(Int32)), 6 | `Users` AggregateFunction(uniq, Nullable(Int32)) 7 | ) 8 | ENGINE = AggregatingMergeTree 9 | ORDER BY (StartDate, CounterID) 10 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_collapsing_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '122b369e-3866-4c2b-8ca1-9e07c75ecee0' 2 | ( 3 | `UserID` UInt64, 4 | `PageViews` UInt8, 5 | `Duration` UInt8, 6 | `Sign` Int8 7 | ) 8 | ENGINE = CollapsingMergeTree(Sign) 9 | ORDER BY UserID 10 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '9612256b-b461-4df5-8015-72f9727d1f95' 2 | ( 3 | `generation` UInt64, 4 | `date_key` DateTime, 5 | `number` UInt64, 6 | `text` String, 7 | `expired` DateTime DEFAULT now() 8 | ) 9 | ENGINE = MergeTree 10 | PARTITION BY toMonth(date_key) 11 | ORDER BY (generation, date_key) 12 | TTL expired + toIntervalMinute(3) TO DISK 'object_storage' 13 | SETTINGS index_granularity = 819 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_merge_tree_field_engine.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '9612256b-b461-4df5-8015-72f9727d1f95' 2 | ( 3 | `generation` UInt64, 4 | `date_key` DateTime, 5 | `number` UInt64, 6 | `ENGINE` String, 7 | `expired` DateTime DEFAULT now() 8 | ) 9 | ENGINE = MergeTree 10 | PARTITION BY toMonth(date_key) 11 | ORDER BY (generation, date_key) 12 | TTL expired + toIntervalMinute(3) TO DISK 'object_storage' 13 | SETTINGS index_granularity = 819 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_merge_tree_field_uuid.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '9612256b-b461-4df5-8015-72f9727d1f95' 2 | ( 3 | `generation` UInt64, 4 | `date_key` DateTime, 5 | `number` UInt64, 6 | `UUID` String, 7 | `expired` DateTime DEFAULT now() 8 | ) 9 | ENGINE = MergeTree 10 | PARTITION BY toMonth(date_key) 11 | ORDER BY (generation, date_key) 12 | TTL expired + toIntervalMinute(3) TO DISK 'object_storage' 13 | SETTINGS index_granularity = 819 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_replacing_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID 'c322e832-2628-45f9-b2f5-fd659078c5c2' 2 | ( 3 | `key` Int64, 4 | `someCol` String, 5 | `eventTime` DateTime 6 | ) 7 | ENGINE = ReplacingMergeTree 8 | ORDER BY key 9 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_replicated_aggregating_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '8ac44a5e-091e-4dc4-9eb0-0ba577b3afd7' 2 | ( 3 | `id` UInt32, 4 | `value` AggregateFunction(sum, UInt32) 5 | ) 6 | ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/{shard}/example_replicated_aggregating_mergetree', '{replica}') 7 | ORDER BY id 8 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_replicated_collapsing_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '9317cb30-1efd-44bd-ab88-d0e3a025965a' 2 | ( 3 | `id` UInt32, 4 | `value` Int32, 5 | `sign` Int8 6 | ) 7 | ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/{shard}/example_replicated_collapsing_mergetree', '{replica}', sign) 8 | ORDER BY id 9 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_replicated_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID 'f438d816-605d-4fe0-a9cb-4edba3ce72dd' 2 | ( 3 | `generation` UInt64, 4 | `date_key` DateTime, 5 | `number` UInt64, 6 | `text` String, 7 | `expired` DateTime DEFAULT now() 8 | ) 9 | ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/test_table_repl', '{replica}') 10 | PARTITION BY toMonth(date_key) 11 | ORDER BY (generation, date_key) 12 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_replicated_merge_tree_ver.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID 'f438d816-605d-4fe0-a9cb-4edba3ce72dd' 2 | ( 3 | `generation` UInt64, 4 | `date_key` DateTime, 5 | `number` UInt64, 6 | `text` String, 7 | `expired` DateTime DEFAULT now() 8 | ) 9 | ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/test_table_repl1', '{replica}', ver) 10 | PARTITION BY toMonth(date_key) 11 | ORDER BY (generation, date_key) 12 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_replicated_replacing_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '4ce817e2-8043-4655-869e-eeab3edeae6a' 2 | ( 3 | `D` Date, 4 | `ID` Int64, 5 | `Ver` UInt64 6 | ) 7 | ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/tableName/{shard}/', '{replica}', Ver) 8 | PARTITION BY toYYYYMM(D) 9 | ORDER BY ID 10 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_replicated_summing_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '72b4c520-9cc2-4549-ba6c-bd952bb049d8' 2 | ( 3 | `D` Date, 4 | `ID` Int64, 5 | `Ver` UInt64 6 | ) 7 | ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/tableName/{shard}/1', '{replica}', Ver) 8 | PARTITION BY toYYYYMM(D) 9 | ORDER BY ID 10 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_replicated_versioned_collapsing_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '10ccbec1-6b78-48fe-a51a-fb7c9f7fbe4a' 2 | ( 3 | `id` UInt32, 4 | `value` Int32, 5 | `sign` Int8, 6 | `version` UInt32 7 | ) 8 | ENGINE = ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{shard}/example_replicated_versioned_collapsing_mergetree', '{replica}', sign, version) 9 | ORDER BY id 10 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_summing_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '5f55555d-c9f7-47ac-8d87-b3ac8a889161' 2 | ( 3 | `key` UInt32, 4 | `value` UInt32 5 | ) 6 | ENGINE = SummingMergeTree 7 | ORDER BY key 8 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/metadata/table_versioned_collapsing_merge_tree.sql: -------------------------------------------------------------------------------- 1 | ATTACH TABLE _ UUID '42089e02-c13c-4b52-a1bf-4f9aa3e84e56' 2 | ( 3 | `UserID` UInt64, 4 | `PageViews` UInt8, 5 | `Duration` UInt8, 6 | `Sign` Int8, 7 | `Version` UInt8 8 | ) 9 | ENGINE = VersionedCollapsingMergeTree(Sign, Version) 10 | ORDER BY UserID 11 | SETTINGS index_granularity = 8192 -------------------------------------------------------------------------------- /tests/unit/common/clickhouse/test_zk_path_escape.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ch_tools.chadmin.internal.zookeeper import escape_for_zookeeper 4 | 5 | # type: ignore 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "hostname, result", 10 | [ 11 | pytest.param( 12 | "zone-hostname.database.urs.net", 13 | "zone%2Dhostname%2Edatabase%2Eurs%2Enet", 14 | ), 15 | ], 16 | ) 17 | def test_config(hostname, result): 18 | 19 | assert escape_for_zookeeper(hostname) == result 20 | -------------------------------------------------------------------------------- /tests/unit/common/query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/tests/unit/common/query/__init__.py -------------------------------------------------------------------------------- /tests/unit/common/query/test_query.py: -------------------------------------------------------------------------------- 1 | from ch_tools.common.clickhouse.client.query import Query 2 | 3 | query_1 = Query( 4 | "SELECT * FROM users WHERE name = {{name}} AND password = {password}", 5 | {"password": "123"}, 6 | ) 7 | query_2 = Query("SELECT * FROM users WHERE name = {name}", {}) 8 | query_3 = Query("SELECT * FROM users WHERE password = {password}", {"password": "123"}) 9 | 10 | 11 | def test_for_execute() -> None: 12 | assert ( 13 | query_1.for_execute() 14 | == "SELECT * FROM users WHERE name = {name} AND password = 123" 15 | ) 16 | assert query_2.for_execute() == "SELECT * FROM users WHERE name = {name}" 17 | assert query_3.for_execute() == "SELECT * FROM users WHERE password = 123" 18 | 19 | 20 | def test_str() -> None: 21 | assert ( 22 | str(query_1) == "SELECT * FROM users WHERE name = {name} AND password = *****" 23 | ) 24 | assert str(query_2) == "SELECT * FROM users WHERE name = {name}" 25 | assert str(query_3) == "SELECT * FROM users WHERE password = *****" 26 | 27 | 28 | def test_repr() -> None: 29 | assert ( 30 | repr(query_1) 31 | == "Query(value='SELECT * FROM users WHERE name = {name} AND password = *****', sensitive_args={'password': '*****'})" 32 | ) 33 | assert ( 34 | repr(query_2) 35 | == "Query(value='SELECT * FROM users WHERE name = {name}', sensitive_args={})" 36 | ) 37 | assert ( 38 | repr(query_3) 39 | == "Query(value='SELECT * FROM users WHERE password = *****', sensitive_args={'password': '*****'})" 40 | ) 41 | 42 | 43 | def test_eq_and_hash() -> None: 44 | query_2 = Query(query_1.value, query_1.sensitive_args) 45 | assert query_1 == query_2 46 | assert hash(query_1) == hash(query_2) 47 | 48 | 49 | def test_add() -> None: 50 | added_query = query_1 + " LIMIT 10" 51 | assert ( 52 | str(added_query) 53 | == "SELECT * FROM users WHERE name = {name} AND password = ***** LIMIT 10" 54 | ) 55 | -------------------------------------------------------------------------------- /tests/unit/common/type/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/tests/unit/common/type/__init__.py -------------------------------------------------------------------------------- /tests/unit/common/type/test_typed_enum.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from typing import Sequence, TypeVar 3 | 4 | from hamcrest import assert_that, equal_to 5 | from pytest import mark 6 | 7 | from ch_tools.common.type.typed_enum import IntEnum, StrEnum, TypedEnum 8 | 9 | T = TypeVar("T", int, str) 10 | 11 | 12 | class SEnum(StrEnum): 13 | A = "AAA" 14 | B = "BBB" 15 | 16 | 17 | class IEnum(IntEnum): 18 | A = 1 19 | B = 2 20 | 21 | 22 | @mark.parametrize( 23 | ["inputs", "stringified_expected", "summed_expected"], 24 | [ 25 | ((SEnum.A,), ["AAA"], "AAA"), 26 | ((SEnum.B,), ["BBB"], "BBB"), 27 | ((SEnum.A, SEnum.A), ["AAA", "AAA"], "AAAAAA"), 28 | ((SEnum.A, SEnum.B), ["AAA", "BBB"], "AAABBB"), 29 | ((SEnum.B, SEnum.A), ["BBB", "AAA"], "BBBAAA"), 30 | ((SEnum.B, SEnum.B), ["BBB", "BBB"], "BBBBBB"), 31 | ((SEnum.B, SEnum.B, SEnum.B), ["BBB", "BBB", "BBB"], "BBBBBBBBB"), 32 | ((IEnum.A,), ["1"], 1), 33 | ((IEnum.B,), ["2"], 2), 34 | ((IEnum.A, IEnum.A), ["1", "1"], 2), 35 | ((IEnum.A, IEnum.B), ["1", "2"], 3), 36 | ((IEnum.B, IEnum.A), ["2", "1"], 3), 37 | ((IEnum.B, IEnum.B), ["2", "2"], 4), 38 | ((IEnum.B, IEnum.B, IEnum.B), ["2", "2", "2"], 6), 39 | ], 40 | ) 41 | def test_typed_enum( 42 | inputs: Sequence[TypedEnum], stringified_expected: Sequence[str], summed_expected: T 43 | ) -> None: 44 | stringified: Sequence[str] = [str(i) for i in inputs] 45 | assert_that(stringified, equal_to(stringified_expected)) 46 | 47 | summed = reduce(lambda a, b: a + b, inputs) # type: ignore 48 | assert_that(summed, equal_to(summed_expected)) 49 | -------------------------------------------------------------------------------- /tests/unit/monrun/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/ch-tools/93cdec05ba9456e3b6d824ca19cf2d592ba5b003/tests/unit/monrun/__init__.py --------------------------------------------------------------------------------